diff options
author | murphy <murphy@rubychan.de> | 2005-10-29 14:49:23 +0000 |
---|---|---|
committer | murphy <murphy@rubychan.de> | 2005-10-29 14:49:23 +0000 |
commit | 382685e66f662d8bfb760099ec5e394f15db0d3c (patch) | |
tree | 56c395136b35fa36a0589ecb04984b7d3a7b0eff /bench | |
parent | 84199b89843d0eca9a85b66dbeae9fd10fdc7d63 (diff) | |
download | coderay-382685e66f662d8bfb760099ec5e394f15db0d3c.tar.gz |
Added the good old benchmark suite to the repository.
Diffstat (limited to 'bench')
-rw-r--r-- | bench/bench.rb | 159 | ||||
-rw-r--r-- | bench/example.c | 92649 | ||||
-rw-r--r-- | bench/example.cpp | 13544 | ||||
-rw-r--r-- | bench/example.delphi | 2708 | ||||
-rw-r--r-- | bench/example.dump | bin | 0 -> 218507 bytes | |||
-rw-r--r-- | bench/example.plain | 201 | ||||
-rw-r--r-- | bench/example.rb | 10070 | ||||
-rw-r--r-- | bench/example.ruby | 10070 | ||||
-rw-r--r-- | bench/example.rubyfast | 10428 | ||||
-rw-r--r-- | bench/example.rubylex | bin | 0 -> 284360 bytes | |||
-rw-r--r-- | bench/strange.c | 90 | ||||
-rw-r--r-- | bench/strange.ruby | 328 |
12 files changed, 140247 insertions, 0 deletions
diff --git a/bench/bench.rb b/bench/bench.rb new file mode 100644 index 0000000..f705258 --- /dev/null +++ b/bench/bench.rb @@ -0,0 +1,159 @@ +# The most ugly test script I've ever written! +# Shame on me! + +require 'profile' if ARGV.include? '-p' +require 'coderay' + +@size = ARGV.fetch(2, 100).to_i * 2**10 # 2**10 = 1 Ki + +lang = ARGV.fetch(0) do + puts <<-HELP +Usage: + ruby bench.rb (c|ruby|dump) (null|text|tokens|count|statistic|yaml|html) [SIZE in KB] [stream] + + SIZE defaults to 100. + SIZE = 0 means the whole input. + SIZE is ignored when dump is input. + +-p generates a profile (slow! use with SIZE = 1) +-o shows the output +stream enabled streaming mode + +Sorry for the strange interface. I will improve it in the next release. + HELP + exit +end + +format = ARGV.fetch(1, 'html').downcase + +$stream = ARGV.include? 'stream' +$optimize = ARGV.include? 'opt' +$style = ARGV.include? 'style' + +require 'benchmark' +require 'fileutils' + +if format == 'comp' + format = 'html' + compare = true + begin + require 'syntax' + require 'syntax/convertors/html.rb' + rescue LoadError + raise 'This requires Syntax! (Try % gem install syntax)' + end +end + +$dump_input = lang == 'dump' +$dump_output = format == 'dump' +require 'coderay/helpers/gzip_simple.rb' if $dump_input + +MYDIR = File.dirname __FILE__ +def here fn = nil + return MYDIR unless fn + File.join here, fn +end + +n = ARGV.find { |a| a[/^N/] } +N = if n then n[/\d+/].to_i else 1 end +o = ARGV.find { |a| a[/^O/] } +Offset = if o then o[/\d+/].to_i else 1 end +b = ARGV.find { |a| a[/^B/] } +BoldEvery = if b then b[/\d+/].to_i else 10 end +$filename = ARGV.include?('strange') ? 'strange' : 'example' + +Benchmark.bm(20) do |bm| + + data = nil + File.open(here("#$filename." + lang), 'rb') { |f| data = f.read } + if $dump_input + @size = CodeRay::Tokens.load(data).text_size + else + unless @size.zero? + data += data until data.size >= @size + data = data[0, @size] + end + @size = data.size + end + + time = bm.report('CodeRay') do + options = { :tab_width => 2, :line_numbers => :table, :line_numbers_offset => Offset, :bold_every => BoldEvery, :wrap => :page, :css => $style ? :style : :class} + options[:debug] = $DEBUG + $hl = CodeRay.encoder(format, options) unless $dump_output + N.times do + if $stream + if $dump_input + raise 'Can\'t stream dump.' + elsif $dump_output + raise 'Can\'t dump stream.' + end + $o = $hl.encode_stream(data, lang, options) + @token_count = $hl.token_stream.size + else + if $dump_input + tokens = CodeRay::Tokens.load data + else + tokens = CodeRay.scan(data, lang) + @token_count = tokens.size + end + @token_count = tokens.size + tokens.optimize! if $optimize + if $dump_output + $o = tokens.optimize.dump + else + $o = tokens.encode($hl) + end + end + end + $file_created = 'test.' + format + file = here($file_created) + File.open(file, 'wb') do |f| + f.write $o + end + end + Dir.chdir(here) do + FileUtils.copy 'test.dump', 'example.dump' if $dump_output + end + + time_real = time.real / N + + puts "\t%7.2f KB/sec (%d.%d KB)\t%0.2f KTok/sec" % [((@size / 1024.0) / time_real), @size / 1024, @size % 1024, ((@token_count / 1000.0) / time_real)] + puts $o if ARGV.include? '-o' + + if compare + time = bm.report('Syntax') do + c = Syntax::Convertors::HTML.for_syntax 'ruby' + Dir.chdir(here) do + File.open('test.syntax.' + format, 'wb') do |f| + f.write '<html><head><style>%s</style></head><body><div class="ruby">%s</div></body></html>' % [DATA.read, c.convert(data)] + end + end + $file_created << " and test.syntax.#{format}" + end + puts "\t%7.2f KB/sec" % ((@size / 1024.0) / time.real) + end + +end +puts "Files created: #$file_created" + +STDIN.gets if ARGV.include? 'wait' + +__END__ +.ruby .normal {} +.ruby .comment { color: #005; font-style: italic; } +.ruby .keyword { color: #A00; font-weight: bold; } +.ruby .method { color: #077; } +.ruby .class { color: #074; } +.ruby .module { color: #050; } +.ruby .punct { color: #447; font-weight: bold; } +.ruby .symbol { color: #099; } +.ruby .string { color: #944; background: #FFE; } +.ruby .char { color: #F07; } +.ruby .ident { color: #004; } +.ruby .constant { color: #07F; } +.ruby .regex { color: #B66; background: #FEF; } +.ruby .number { color: #F99; } +.ruby .attribute { color: #7BB; } +.ruby .global { color: #7FB; } +.ruby .expr { color: #227; } +.ruby .escape { color: #277; } diff --git a/bench/example.c b/bench/example.c new file mode 100644 index 0000000..3a5b4b1 --- /dev/null +++ b/bench/example.c @@ -0,0 +1,92649 @@ +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include "codegen.h" +#include "symboltable.h" +#include "stringbuffer.h" + +extern void yyerror(char* msg); + +static stringBuffer* staticVariableBuffer; +static stringBuffer* classInitBuffer; +static stringBuffer* currentMethodBuffer; +static stringBuffer* finishedMethodsBuffer; +static stringBuffer* mainBuffer; + +static int currentMethodBufferIndex; +static int currentMethodStackSize; +static int currentMethodStackSizeMax; +static int currentMethodNumberOfLocals; + +static int classInitBufferIndex; +static int classInitStackSize; +static int classInitStackSizeMax; + +static int labelCounter = 0; +static int global = 1; + +char tempString[MAX_LENGTH_OF_COMMAND]; + +extern char* className; /* from minako-syntax.y */ + +/* forward declarations */ +static void increaseStackby(int stackdiff); +char convertType(int type); + +void codegenInit() { + staticVariableBuffer = newStringBuffer(); + classInitBuffer = newStringBuffer(); + currentMethodBuffer = 0; + finishedMethodsBuffer = newStringBuffer(); + mainBuffer = newStringBuffer(); + + stringBufferAppend(mainBuffer, "; ------- Header --------------------------------------------"); + sprintf(tempString, ".class public synchronized %s", className); + stringBufferAppend(mainBuffer, tempString); + stringBufferAppend(mainBuffer, ".super java/lang/Object"); + stringBufferAppend(mainBuffer, "; -----------------------------------------------------------"); + stringBufferAppend(mainBuffer, ""); + + stringBufferAppend(finishedMethodsBuffer, "; ------- Constructor ---------------------------------------"); + stringBufferAppend(finishedMethodsBuffer, ".method public <init>()V"); + stringBufferAppend(finishedMethodsBuffer, "\t.limit stack 1"); + stringBufferAppend(finishedMethodsBuffer, "\t.limit locals 1"); + stringBufferAppend(finishedMethodsBuffer, "\taload_0"); + stringBufferAppend(finishedMethodsBuffer, "\tinvokenonvirtual java/lang/Object/<init>()V"); + stringBufferAppend(finishedMethodsBuffer, "\treturn"); + stringBufferAppend(finishedMethodsBuffer, ".end method"); + stringBufferAppend(finishedMethodsBuffer, "; -----------------------------------------------------------"); + stringBufferAppend(finishedMethodsBuffer, ""); + + stringBufferAppend(staticVariableBuffer, "; ------- Class Variables -----------------------------------"); + + stringBufferAppend(classInitBuffer, "; ------- Class Initializer ---------------------------------"); + stringBufferAppend(classInitBuffer, ".method static <clinit>()V"); + classInitBufferIndex = classInitBuffer->numberOfNextElement; + stringBufferAppend(classInitBuffer, "\t.limit locals 0"); + +} + +void codegenAppendCommand(char* cmd, int stackdiff) { + char tempString[MAX_LENGTH_OF_COMMAND]; + sprintf(tempString, "\t%s", cmd); + if (global) stringBufferAppend(classInitBuffer, tempString); + else stringBufferAppend(currentMethodBuffer, tempString); + increaseStackby(stackdiff); +} + +void codegenInsertCommand(int address, char* cmd, int stackdiff) { + char tempString[MAX_LENGTH_OF_COMMAND]; + sprintf(tempString, "\t%s", cmd); + if (global) stringBufferInsert(classInitBuffer, address, tempString); + else stringBufferInsert(currentMethodBuffer, address, tempString); + increaseStackby(stackdiff); +} + +void codegenAppendLabel(int label) { + char tempString[MAX_LENGTH_OF_COMMAND]; + sprintf(tempString, "Label%d:", label); + if (global) stringBufferAppend(classInitBuffer, tempString); + else stringBufferAppend(currentMethodBuffer, tempString); +} + +void codegenAddVariable(char* name, int type) { + /*fprintf(stderr, "add variable %s(%d) global=%d ", name, convertType(type), global);*/ + if (global) { + if (type == TYPE_INT) sprintf(tempString, ".field static %s %c", name, 'I'); + else if (type == TYPE_FLOAT) sprintf(tempString, ".field static %s %c", name, 'F'); + else if (type == TYPE_BOOLEAN) sprintf(tempString, ".field static %s %c", name, 'Z'); + else yyerror("compiler-intern error in codegenAddGlobalVariable().\n"); + stringBufferAppend(staticVariableBuffer, tempString); + } + else { + currentMethodNumberOfLocals++; + } +} + +int codegenGetNextLabel() { + return labelCounter++; +} + +int codegenGetCurrentAddress() { + if (global) return classInitBuffer->numberOfNextElement; + else return currentMethodBuffer->numberOfNextElement; +} + +void codegenEnterFunction(symtabEntry* entry) { + currentMethodBuffer = newStringBuffer(); + currentMethodStackSize = 0; + currentMethodStackSizeMax = 0; + labelCounter = 1; + global = 0; + + if (strcmp(entry->name, "main") == 0) { + if (entry->idtype != TYPE_VOID) yyerror("main has to be void.\n"); + currentMethodNumberOfLocals = 1; + symtabInsert(strdup("#main-param#"), TYPE_VOID, CLASS_FUNC); + stringBufferAppend(currentMethodBuffer, "; ------- Methode ---- void main() --------------------------"); + stringBufferAppend(currentMethodBuffer, ".method public static main([Ljava/lang/String;)V"); + } + else { + int i; + currentMethodNumberOfLocals = entry->paramIndex; + stringBufferAppend(currentMethodBuffer, "; ------- Methode -------------------------------------------"); + sprintf(tempString, ".method public static %s(", entry->name); + for (i=entry->paramIndex-1; i>=0; i--) { + int type = entry->params[i]->idtype; + tempString[strlen(tempString)+1] = 0; + tempString[strlen(tempString)] = convertType(type); + } + tempString[strlen(tempString)+2] = 0; + tempString[strlen(tempString)+1] = convertType(entry->idtype); + tempString[strlen(tempString)] = ')'; + stringBufferAppend(currentMethodBuffer, tempString); + } + currentMethodBufferIndex = currentMethodBuffer->numberOfNextElement; +} + +void codegenLeaveFunction() { + global = 1; + sprintf(tempString, "\t.limit locals %d", currentMethodNumberOfLocals); + stringBufferInsert(currentMethodBuffer, currentMethodBufferIndex, tempString); + sprintf(tempString, "\t.limit stack %d", currentMethodStackSizeMax); + stringBufferInsert(currentMethodBuffer, currentMethodBufferIndex, tempString); + stringBufferAppend(currentMethodBuffer, "\treturn"); + stringBufferAppend(currentMethodBuffer, ".end method"); + stringBufferAppend(currentMethodBuffer, "; -----------------------------------------------------------"); + stringBufferAppend(currentMethodBuffer, ""); + + stringBufferConcatenate(finishedMethodsBuffer, currentMethodBuffer); +} + + + +void codegenFinishCode() { + stringBufferAppend(staticVariableBuffer, "; -----------------------------------------------------------"); + stringBufferAppend(staticVariableBuffer, ""); + + sprintf(tempString, "\t.limit stack %d", classInitStackSizeMax); + stringBufferInsert(classInitBuffer, classInitBufferIndex, tempString); + stringBufferAppend(classInitBuffer, "\treturn"); + stringBufferAppend(classInitBuffer, ".end method"); + stringBufferAppend(classInitBuffer, "; -----------------------------------------------------------"); + + stringBufferConcatenate(mainBuffer, staticVariableBuffer); + stringBufferConcatenate(mainBuffer, finishedMethodsBuffer); + stringBufferConcatenate(mainBuffer, classInitBuffer); + + stringBufferPrint(mainBuffer); +} + +static void increaseStackby(int stackdiff) { + if (global) { + classInitStackSize += stackdiff; + if (classInitStackSize > classInitStackSizeMax) classInitStackSizeMax = classInitStackSize; + } + else { + currentMethodStackSize += stackdiff; + if (currentMethodStackSize > currentMethodStackSizeMax) currentMethodStackSizeMax = currentMethodStackSize; + } +} + +char convertType(int type) { + switch(type) { + case TYPE_VOID: return 'V'; + case TYPE_INT: return 'I'; + case TYPE_FLOAT: return 'F'; + case TYPE_BOOLEAN: return 'Z'; + default: yyerror("compiler-intern error in convertType().\n"); + } + return 0; /* to avoid compiler-warning */ +} + + +//#include <stdlib.h> +//#include <stdio.h> + +int main() { + int a = 12, b = 44; + while (a != b) { + if (a > b) + a -= b; + else + b -= a; + } + printf("%d\n%d", a, 0X0);\ +} + + +/********************************************************************** + + array.c - + + $Author: matz $ + $Date: 2005/03/04 06:47:45 $ + created at: Fri Aug 6 09:46:12 JST 1993 + + Copyright (C) 1993-2003 Yukihiro Matsumoto + Copyright (C) 2000 Network Applied Communication Laboratory, Inc. + Copyright (C) 2000 Information-technology Promotion Agency, Japan + +**********************************************************************/ + +#include "ruby.h" +#include "util.h" +#include "st.h" +#include "node.h" + +VALUE rb_cArray, rb_cValues; + +static ID id_cmp; + +#define ARY_DEFAULT_SIZE 16 + + +void +rb_mem_clear(mem, size) + register VALUE *mem; + register long size; +{ + while (size--) { + *mem++ = Qnil; + } +} + +static inline void +memfill(mem, size, val) + register VALUE *mem; + register long size; + register VALUE val; +{ + while (size--) { + *mem++ = val; + } +} + +#define ARY_TMPLOCK FL_USER1 + +static inline void +rb_ary_modify_check(ary) + VALUE ary; +{ + if (OBJ_FROZEN(ary)) rb_error_frozen("array"); + if (FL_TEST(ary, ARY_TMPLOCK)) + rb_raise(rb_eRuntimeError, "can't modify array during iteration"); + if (!OBJ_TAINTED(ary) && rb_safe_level() >= 4) + rb_raise(rb_eSecurityError, "Insecure: can't modify array"); +} + +static void +rb_ary_modify(ary) + VALUE ary; +{ + VALUE *ptr; + + rb_ary_modify_check(ary); + if (FL_TEST(ary, ELTS_SHARED)) { + ptr = ALLOC_N(VALUE, RARRAY(ary)->len); + FL_UNSET(ary, ELTS_SHARED); + RARRAY(ary)->aux.capa = RARRAY(ary)->len; + MEMCPY(ptr, RARRAY(ary)->ptr, VALUE, RARRAY(ary)->len); + RARRAY(ary)->ptr = ptr; + } +} + +VALUE +rb_ary_freeze(ary) + VALUE ary; +{ + return rb_obj_freeze(ary); +} + +/* + * call-seq: + * array.frozen? -> true or false + * + * Return <code>true</code> if this array is frozen (or temporarily frozen + * while being sorted). + */ + +static VALUE +rb_ary_frozen_p(ary) + VALUE ary; +{ + if (OBJ_FROZEN(ary)) return Qtrue; + if (FL_TEST(ary, ARY_TMPLOCK)) return Qtrue; + return Qfalse; +} + +static VALUE ary_alloc(VALUE); +static VALUE +ary_alloc(klass) + VALUE klass; +{ + NEWOBJ(ary, struct RArray); + OBJSETUP(ary, klass, T_ARRAY); + + ary->len = 0; + ary->ptr = 0; + ary->aux.capa = 0; + + return (VALUE)ary; +} + +static VALUE +ary_new(klass, len) + VALUE klass; + long len; +{ + VALUE ary; + + if (len < 0) { + rb_raise(rb_eArgError, "negative array size (or size too big)"); + } + if (len > 0 && len * sizeof(VALUE) <= len) { + rb_raise(rb_eArgError, "array size too big"); + } + if (len == 0) len++; + + ary = ary_alloc(klass); + RARRAY(ary)->ptr = ALLOC_N(VALUE, len); + RARRAY(ary)->aux.capa = len; + + return ary; +} + +VALUE +rb_ary_new2(len) + long len; +{ + return ary_new(rb_cArray, len); +} + + +VALUE +rb_ary_new() +{ + return rb_ary_new2(ARY_DEFAULT_SIZE); +} + +#ifdef HAVE_STDARG_PROTOTYPES +#include <stdarg.h> +#define va_init_list(a,b) va_start(a,b) +#else +#include <varargs.h> +#define va_init_list(a,b) va_start(a) +#endif + +VALUE +#ifdef HAVE_STDARG_PROTOTYPES +rb_ary_new3(long n, ...) +#else +rb_ary_new3(n, va_alist) + long n; + va_dcl +#endif +{ + va_list ar; + VALUE ary; + long i; + + ary = rb_ary_new2(n); + + va_init_list(ar, n); + for (i=0; i<n; i++) { + RARRAY(ary)->ptr[i] = va_arg(ar, VALUE); + } + va_end(ar); + + RARRAY(ary)->len = n; + return ary; +} + +VALUE +rb_ary_new4(n, elts) + long n; + const VALUE *elts; +{ + VALUE ary; + + ary = rb_ary_new2(n); + if (n > 0 && elts) { + MEMCPY(RARRAY(ary)->ptr, elts, VALUE, n); + } + RARRAY(ary)->len = n; + + return ary; +} + +VALUE +#ifdef HAVE_STDARG_PROTOTYPES +rb_values_new(long n, ...) +#else +rb_values_new(n, va_alist) + long n; + va_dcl +#endif +{ + va_list ar; + VALUE val; + long i; + + val = ary_new(rb_cValues, n); + va_init_list(ar, n); + for (i=0; i<n; i++) { + RARRAY(val)->ptr[i] = va_arg(ar, VALUE); + } + va_end(ar); + RARRAY(val)->len = n; + + return val; +} + +VALUE +rb_values_new2(n, elts) + long n; + const VALUE *elts; +{ + VALUE val; + + val = ary_new(rb_cValues, n); + if (n > 0 && elts) { + RARRAY(val)->len = n; + MEMCPY(RARRAY(val)->ptr, elts, VALUE, n); + } + + return val; +} + +static VALUE +ary_make_shared(ary) + VALUE ary; +{ + if (!FL_TEST(ary, ELTS_SHARED)) { + NEWOBJ(shared, struct RArray); + OBJSETUP(shared, rb_cArray, T_ARRAY); + + shared->len = RARRAY(ary)->len; + shared->ptr = RARRAY(ary)->ptr; + shared->aux.capa = RARRAY(ary)->aux.capa; + RARRAY(ary)->aux.shared = (VALUE)shared; + FL_SET(ary, ELTS_SHARED); + OBJ_FREEZE(shared); + return (VALUE)shared; + } + else { + return RARRAY(ary)->aux.shared; + } +} + +static VALUE +ary_shared_array(klass, ary) + VALUE klass, ary; +{ + VALUE val = ary_alloc(klass); + + ary_make_shared(ary); + RARRAY(val)->ptr = RARRAY(ary)->ptr; + RARRAY(val)->len = RARRAY(ary)->len; + RARRAY(val)->aux.shared = RARRAY(ary)->aux.shared; + FL_SET(val, ELTS_SHARED); + return val; +} + +VALUE +rb_values_from_ary(ary) + VALUE ary; +{ + return ary_shared_array(rb_cValues, ary); +} + +VALUE +rb_ary_from_values(val) + VALUE val; +{ + return ary_shared_array(rb_cArray, val); +} + +VALUE +rb_assoc_new(car, cdr) + VALUE car, cdr; +{ + return rb_values_new(2, car, cdr); +} + +static VALUE +to_ary(ary) + VALUE ary; +{ + return rb_convert_type(ary, T_ARRAY, "Array", "to_ary"); +} + +static VALUE +to_a(ary) + VALUE ary; +{ + return rb_convert_type(ary, T_ARRAY, "Array", "to_a"); +} + +VALUE +rb_check_array_type(ary) + VALUE ary; +{ + return rb_check_convert_type(ary, T_ARRAY, "Array", "to_ary"); +} + +static VALUE rb_ary_replace _((VALUE, VALUE)); + +/* + * call-seq: + * Array.new(size=0, obj=nil) + * Array.new(array) + * Array.new(size) {|index| block } + * + * Returns a new array. In the first form, the new array is + * empty. In the second it is created with _size_ copies of _obj_ + * (that is, _size_ references to the same + * _obj_). The third form creates a copy of the array + * passed as a parameter (the array is generated by calling + * to_ary on the parameter). In the last form, an array + * of the given size is created. Each element in this array is + * calculated by passing the element's index to the given block and + * storing the return value. + * + * Array.new + * Array.new(2) + * Array.new(5, "A") + * + * # only one copy of the object is created + * a = Array.new(2, Hash.new) + * a[0]['cat'] = 'feline' + * a + * a[1]['cat'] = 'Felix' + * a + * + * # here multiple copies are created + * a = Array.new(2) { Hash.new } + * a[0]['cat'] = 'feline' + * a + * + * squares = Array.new(5) {|i| i*i} + * squares + * + * copy = Array.new(squares) + */ + +static VALUE +rb_ary_initialize(argc, argv, ary) + int argc; + VALUE *argv; + VALUE ary; +{ + long len; + VALUE size, val; + + if (rb_scan_args(argc, argv, "02", &size, &val) == 0) { + RARRAY(ary)->len = 0; + if (rb_block_given_p()) { + rb_warning("given block not used"); + } + return ary; + } + + if (argc == 1 && !FIXNUM_P(size)) { + val = rb_check_array_type(size); + if (!NIL_P(val)) { + rb_ary_replace(ary, val); + return ary; + } + } + + len = NUM2LONG(size); + if (len < 0) { + rb_raise(rb_eArgError, "negative array size"); + } + if (len > 0 && len * (long)sizeof(VALUE) <= len) { + rb_raise(rb_eArgError, "array size too big"); + } + rb_ary_modify(ary); + if (len > RARRAY(ary)->aux.capa) { + REALLOC_N(RARRAY(ary)->ptr, VALUE, len); + RARRAY(ary)->aux.capa = len; + } + if (rb_block_given_p()) { + long i; + + if (argc == 2) { + rb_warn("block supersedes default value argument"); + } + for (i=0; i<len; i++) { + rb_ary_store(ary, i, rb_yield(LONG2NUM(i))); + RARRAY(ary)->len = i + 1; + } + } + else { + memfill(RARRAY(ary)->ptr, len, val); + RARRAY(ary)->len = len; + } + + return ary; +} + + +/* +* Returns a new array populated with the given objects. +* +* Array.[]( 1, 'a', /^A/ ) +* Array[ 1, 'a', /^A/ ] +* [ 1, 'a', /^A/ ] +*/ + +static VALUE +rb_ary_s_create(argc, argv, klass) + int argc; + VALUE *argv; + VALUE klass; +{ + VALUE ary = ary_alloc(klass); + + if (argc > 0) { + RARRAY(ary)->ptr = ALLOC_N(VALUE, argc); + MEMCPY(RARRAY(ary)->ptr, argv, VALUE, argc); + } + RARRAY(ary)->len = RARRAY(ary)->aux.capa = argc; + + return ary; +} + +void +rb_ary_store(ary, idx, val) + VALUE ary; + long idx; + VALUE val; +{ + if (idx < 0) { + idx += RARRAY(ary)->len; + if (idx < 0) { + rb_raise(rb_eIndexError, "index %ld out of array", + idx - RARRAY(ary)->len); + } + } + + rb_ary_modify(ary); + if (idx >= RARRAY(ary)->aux.capa) { + long new_capa = RARRAY(ary)->aux.capa / 2; + + if (new_capa < ARY_DEFAULT_SIZE) { + new_capa = ARY_DEFAULT_SIZE; + } + new_capa += idx; + if (new_capa * (long)sizeof(VALUE) <= new_capa) { + rb_raise(rb_eArgError, "index too big"); + } + REALLOC_N(RARRAY(ary)->ptr, VALUE, new_capa); + RARRAY(ary)->aux.capa = new_capa; + } + if (idx > RARRAY(ary)->len) { + rb_mem_clear(RARRAY(ary)->ptr + RARRAY(ary)->len, + idx-RARRAY(ary)->len + 1); + } + + if (idx >= RARRAY(ary)->len) { + RARRAY(ary)->len = idx + 1; + } + RARRAY(ary)->ptr[idx] = val; +} + +static VALUE +ary_shared_first(argc, argv, ary) + int argc; + VALUE *argv; + VALUE ary; +{ + VALUE nv, result; + long n; + + rb_scan_args(argc, argv, "1", &nv); + n = NUM2LONG(nv); + if (n > RARRAY(ary)->len) { + n = RARRAY(ary)->len; + } + else if (n < 0) { + rb_raise(rb_eArgError, "negative array size"); + } + result = ary_shared_array(rb_cArray, ary); + RARRAY(result)->len = n; + return result; +} + +static VALUE +ary_shared_last(argc, argv, ary) + int argc; + VALUE *argv; + VALUE ary; +{ + VALUE result = ary_shared_first(argc, argv, ary); + + RARRAY(result)->ptr += RARRAY(ary)->len - RARRAY(result)->len; + return result; +} + +/* + * call-seq: + * array << obj -> array + * + * Append---Pushes the given object on to the end of this array. This + * expression returns the array itself, so several appends + * may be chained together. + * + * [ 1, 2 ] << "c" << "d" << [ 3, 4 ] + * #=> [ 1, 2, "c", "d", [ 3, 4 ] ] + * + */ + +VALUE +rb_ary_push(ary, item) + VALUE ary; + VALUE item; +{ + rb_ary_store(ary, RARRAY(ary)->len, item); + return ary; +} + +/* + * call-seq: + * array.push(obj, ... ) -> array + * + * Append---Pushes the given object(s) on to the end of this array. This + * expression returns the array itself, so several appends + * may be chained together. + * + * a = [ "a", "b", "c" ] + * a.push("d", "e", "f") + * #=> ["a", "b", "c", "d", "e", "f"] + */ + +static VALUE +rb_ary_push_m(argc, argv, ary) + int argc; + VALUE *argv; + VALUE ary; +{ + while (argc--) { + rb_ary_push(ary, *argv++); + } + return ary; +} + +VALUE +rb_ary_pop(ary) + VALUE ary; +{ + rb_ary_modify_check(ary); + if (RARRAY(ary)->len == 0) return Qnil; + if (!FL_TEST(ary, ELTS_SHARED) && + RARRAY(ary)->len * 2 < RARRAY(ary)->aux.capa && + RARRAY(ary)->aux.capa > ARY_DEFAULT_SIZE) { + RARRAY(ary)->aux.capa = RARRAY(ary)->len * 2; + REALLOC_N(RARRAY(ary)->ptr, VALUE, RARRAY(ary)->aux.capa); + } + return RARRAY(ary)->ptr[--RARRAY(ary)->len]; +} + +/* + * call-seq: + * array.pop -> obj or nil + * + * Removes the last element from <i>self</i> and returns it, or + * <code>nil</code> if the array is empty. + * + * a = [ "a", "b", "c", "d" ] + * a.pop #=> "d" + * a.pop(2) #=> ["b", "c"] + * a #=> ["a"] + */ + +static VALUE +rb_ary_pop_m(argc, argv, ary) + int argc; + VALUE *argv; + VALUE ary; +{ + VALUE result; + + if (argc == 0) { + return rb_ary_pop(ary); + } + + rb_ary_modify_check(ary); + + result = ary_shared_last(argc, argv, ary); + RARRAY(ary)->len -= RARRAY(result)->len; + return result; +} + +VALUE +rb_ary_shift(ary) + VALUE ary; +{ + VALUE top; + + rb_ary_modify_check(ary); + if (RARRAY(ary)->len == 0) return Qnil; + top = RARRAY(ary)->ptr[0]; + ary_make_shared(ary); + RARRAY(ary)->ptr++; /* shift ptr */ + RARRAY(ary)->len--; + + return top; +} + +/* + * call-seq: + * array.shift -> obj or nil + * + * Returns the first element of <i>self</i> and removes it (shifting all + * other elements down by one). Returns <code>nil</code> if the array + * is empty. + * + * args = [ "-m", "-q", "filename" ] + * args.shift #=> "-m" + * args #=> ["-q", "filename"] + * + * args = [ "-m", "-q", "filename" ] + * args.shift(2) #=> ["-m", "-q"] + * args #=> ["filename"] + */ + +static VALUE +rb_ary_shift_m(argc, argv, ary) + int argc; + VALUE *argv; + VALUE ary; +{ + VALUE result; + long n; + + if (argc == 0) { + return rb_ary_shift(ary); + } + + rb_ary_modify_check(ary); + + result = ary_shared_first(argc, argv, ary); + n = RARRAY(result)->len; + RARRAY(ary)->ptr += n; + RARRAY(ary)->len -= n; + + return result; +} + +VALUE +rb_ary_unshift(ary, item) + VALUE ary, item; +{ + rb_ary_modify(ary); + if (RARRAY(ary)->len == RARRAY(ary)->aux.capa) { + long capa_inc = RARRAY(ary)->aux.capa / 2; + if (capa_inc < ARY_DEFAULT_SIZE) { + capa_inc = ARY_DEFAULT_SIZE; + } + RARRAY(ary)->aux.capa += capa_inc; + REALLOC_N(RARRAY(ary)->ptr, VALUE, RARRAY(ary)->aux.capa); + } + + /* sliding items */ + MEMMOVE(RARRAY(ary)->ptr + 1, RARRAY(ary)->ptr, VALUE, RARRAY(ary)->len); + + RARRAY(ary)->len++; + RARRAY(ary)->ptr[0] = item; + + return ary; +} + +/* + * call-seq: + * array.unshift(obj, ...) -> array + * + * Prepends objects to the front of <i>array</i>. + * other elements up one. + * + * a = [ "b", "c", "d" ] + * a.unshift("a") #=> ["a", "b", "c", "d"] + * a.unshift(1, 2) #=> [ 1, 2, "a", "b", "c", "d"] + */ + +static VALUE +rb_ary_unshift_m(argc, argv, ary) + int argc; + VALUE *argv; + VALUE ary; +{ + long len = RARRAY(ary)->len; + + if (argc == 0) return ary; + + /* make rooms by setting the last item */ + rb_ary_store(ary, len + argc - 1, Qnil); + + /* sliding items */ + MEMMOVE(RARRAY(ary)->ptr + argc, RARRAY(ary)->ptr, VALUE, len); + MEMCPY(RARRAY(ary)->ptr, argv, VALUE, argc); + + return ary; +} + +/* faster version - use this if you don't need to treat negative offset */ +static inline VALUE +rb_ary_elt(ary, offset) + VALUE ary; + long offset; +{ + if (RARRAY(ary)->len == 0) return Qnil; + if (offset < 0 || RARRAY(ary)->len <= offset) { + return Qnil; + } + return RARRAY(ary)->ptr[offset]; +} + +VALUE +rb_ary_entry(ary, offset) + VALUE ary; + long offset; +{ + if (offset < 0) { + offset += RARRAY(ary)->len; + } + return rb_ary_elt(ary, offset); +} + +static VALUE +rb_ary_subseq(ary, beg, len) + VALUE ary; + long beg, len; +{ + VALUE klass, ary2, shared; + VALUE *ptr; + + if (beg > RARRAY(ary)->len) return Qnil; + if (beg < 0 || len < 0) return Qnil; + + if (beg + len > RARRAY(ary)->len) { + len = RARRAY(ary)->len - beg; + if (len < 0) + len = 0; + } + klass = rb_obj_class(ary); + if (len == 0) return ary_new(klass, 0); + + shared = ary_make_shared(ary); + ptr = RARRAY(ary)->ptr; + ary2 = ary_alloc(klass); + RARRAY(ary2)->ptr = ptr + beg; + RARRAY(ary2)->len = len; + RARRAY(ary2)->aux.shared = shared; + FL_SET(ary2, ELTS_SHARED); + + return ary2; +} + +/* + * call-seq: + * array[index] -> obj or nil + * array[start, length] -> an_array or nil + * array[range] -> an_array or nil + * array.slice(index) -> obj or nil + * array.slice(start, length) -> an_array or nil + * array.slice(range) -> an_array or nil + * + * Element Reference---Returns the element at _index_, + * or returns a subarray starting at _start_ and + * continuing for _length_ elements, or returns a subarray + * specified by _range_. + * Negative indices count backward from the end of the + * array (-1 is the last element). Returns nil if the index + * (or starting index) are out of range. + * + * a = [ "a", "b", "c", "d", "e" ] + * a[2] + a[0] + a[1] #=> "cab" + * a[6] #=> nil + * a[1, 2] #=> [ "b", "c" ] + * a[1..3] #=> [ "b", "c", "d" ] + * a[4..7] #=> [ "e" ] + * a[6..10] #=> nil + * a[-3, 3] #=> [ "c", "d", "e" ] + * # special cases + * a[5] #=> nil + * a[5, 1] #=> [] + * a[5..10] #=> [] + * + */ + +VALUE +rb_ary_aref(argc, argv, ary) + int argc; + VALUE *argv; + VALUE ary; +{ + VALUE arg; + long beg, len; + + if (argc == 2) { + beg = NUM2LONG(argv[0]); + len = NUM2LONG(argv[1]); + if (beg < 0) { + beg += RARRAY(ary)->len; + } + return rb_ary_subseq(ary, beg, len); + } + if (argc != 1) { + rb_scan_args(argc, argv, "11", 0, 0); + } + arg = argv[0]; + /* special case - speeding up */ + if (FIXNUM_P(arg)) { + return rb_ary_entry(ary, FIX2LONG(arg)); + } + /* check if idx is Range */ + switch (rb_range_beg_len(arg, &beg, &len, RARRAY(ary)->len, 0)) { + case Qfalse: + break; + case Qnil: + return Qnil; + default: + return rb_ary_subseq(ary, beg, len); + } + return rb_ary_entry(ary, NUM2LONG(arg)); +} + +/* + * call-seq: + * array.at(index) -> obj or nil + * + * Returns the element at _index_. A + * negative index counts from the end of _self_. Returns +nil+ + * if the index is out of range. See also <code>Array#[]</code>. + * (<code>Array#at</code> is slightly faster than <code>Array#[]</code>, + * as it does not accept ranges and so on.) + * + * a = [ "a", "b", "c", "d", "e" ] + * a.at(0) #=> "a" + * a.at(-1) #=> "e" + */ + +static VALUE +rb_ary_at(ary, pos) + VALUE ary, pos; +{ + return rb_ary_entry(ary, NUM2LONG(pos)); +} + +/* + * call-seq: + * array.first -> obj or nil + * array.first(n) -> an_array + * + * Returns the first element of the array. If the array is empty, + * returns <code>nil</code>. + * + * a = [ "q", "r", "s", "t" ] + * a.first #=> "q" + * a.first(2) #=> ["q", "r"] + */ + +static VALUE +rb_ary_first(argc, argv, ary) + int argc; + VALUE *argv; + VALUE ary; +{ + if (argc == 0) { + if (RARRAY(ary)->len == 0) return Qnil; + return RARRAY(ary)->ptr[0]; + } + else { + return ary_shared_first(argc, argv, ary); + } +} + +/* + * call-seq: + * array.last -> obj or nil + * array.last(n) -> an_array + * + * Returns the last element(s) of <i>self</i>. If the array is empty, + * the first form returns <code>nil</code>. + * + * a = [ "w", "x", "y", "z" ] + * a.last #=> "z" + * a.last(2) #=> ["y", "z"] + */ + +static VALUE +rb_ary_last(argc, argv, ary) + int argc; + VALUE *argv; + VALUE ary; +{ + if (argc == 0) { + if (RARRAY(ary)->len == 0) return Qnil; + return RARRAY(ary)->ptr[RARRAY(ary)->len-1]; + } + else { + return ary_shared_last(argc, argv, ary); + } +} + +/* + * call-seq: + * array.fetch(index) -> obj + * array.fetch(index, default ) -> obj + * array.fetch(index) {|index| block } -> obj + * + * Tries to return the element at position <i>index</i>. If the index + * lies outside the array, the first form throws an + * <code>IndexError</code> exception, the second form returns + * <i>default</i>, and the third form returns the value of invoking + * the block, passing in the index. Negative values of <i>index</i> + * count from the end of the array. + * + * a = [ 11, 22, 33, 44 ] + * a.fetch(1) #=> 22 + * a.fetch(-1) #=> 44 + * a.fetch(4, 'cat') #=> "cat" + * a.fetch(4) { |i| i*i } #=> 16 + */ + +static VALUE +rb_ary_fetch(argc, argv, ary) + int argc; + VALUE *argv; + VALUE ary; +{ + VALUE pos, ifnone; + long block_given; + long idx; + + rb_scan_args(argc, argv, "11", &pos, &ifnone); + block_given = rb_block_given_p(); + if (block_given && argc == 2) { + rb_warn("block supersedes default value argument"); + } + idx = NUM2LONG(pos); + + if (idx < 0) { + idx += RARRAY(ary)->len; + } + if (idx < 0 || RARRAY(ary)->len <= idx) { + if (block_given) return rb_yield(pos); + if (argc == 1) { + rb_raise(rb_eIndexError, "index %ld out of array", idx); + } + return ifnone; + } + return RARRAY(ary)->ptr[idx]; +} + +/* + * call-seq: + * array.index(obj) -> int or nil + * array.index {|item| block} -> int or nil + * + * Returns the index of the first object in <i>self</i> such that is + * <code>==</code> to <i>obj</i>. If a block is given instead of an + * argument, returns first object for which <em>block</em> is true. + * Returns <code>nil</code> if no match is found. + * + * a = [ "a", "b", "c" ] + * a.index("b") #=> 1 + * a.index("z") #=> nil + * a.index{|x|x=="b"} #=> 1 + */ + +static VALUE +rb_ary_index(argc, argv, ary) + int argc; + VALUE *argv; + VALUE ary; +{ + VALUE val; + long i; + + if (rb_scan_args(argc, argv, "01", &val) == 0) { + for (i=0; i<RARRAY(ary)->len; i++) { + if (RTEST(rb_yield(RARRAY(ary)->ptr[i]))) { + return LONG2NUM(i); + } + } + } + else { + for (i=0; i<RARRAY(ary)->len; i++) { + if (rb_equal(RARRAY(ary)->ptr[i], val)) + return LONG2NUM(i); + } + } + return Qnil; +} + +/* + * call-seq: + * array.rindex(obj) -> int or nil + * + * Returns the index of the last object in <i>array</i> + * <code>==</code> to <i>obj</i>. If a block is given instead of an + * argument, returns first object for which <em>block</em> is + * true. Returns <code>nil</code> if no match is found. + * + * a = [ "a", "b", "b", "b", "c" ] + * a.rindex("b") #=> 3 + * a.rindex("z") #=> nil + * a.rindex{|x|x=="b"} #=> 3 + */ + +static VALUE +rb_ary_rindex(argc, argv, ary) + int argc; + VALUE *argv; + VALUE ary; +{ + VALUE val; + long i = RARRAY(ary)->len; + + if (rb_scan_args(argc, argv, "01", &val) == 0) { + while (i--) { + if (RTEST(rb_yield(RARRAY(ary)->ptr[i]))) + return LONG2NUM(i); + if (i > RARRAY(ary)->len) { + i = RARRAY(ary)->len; + } + } + } + else { + while (i--) { + if (rb_equal(RARRAY(ary)->ptr[i], val)) + return LONG2NUM(i); + if (i > RARRAY(ary)->len) { + i = RARRAY(ary)->len; + } + } + } + return Qnil; +} + +VALUE +rb_ary_to_ary(obj) + VALUE obj; +{ + if (TYPE(obj) == T_ARRAY) { + return obj; + } + if (rb_respond_to(obj, rb_intern("to_ary"))) { + return to_ary(obj); + } + return rb_ary_new3(1, obj); +} + +static void +rb_ary_splice(ary, beg, len, rpl) + VALUE ary; + long beg, len; + VALUE rpl; +{ + long rlen; + + if (len < 0) rb_raise(rb_eIndexError, "negative length (%ld)", len); + if (beg < 0) { + beg += RARRAY(ary)->len; + if (beg < 0) { + beg -= RARRAY(ary)->len; + rb_raise(rb_eIndexError, "index %ld out of array", beg); + } + } + if (beg + len > RARRAY(ary)->len) { + len = RARRAY(ary)->len - beg; + } + + if (rpl == Qundef) { + rlen = 0; + } + else { + rpl = rb_ary_to_ary(rpl); + rlen = RARRAY(rpl)->len; + } + rb_ary_modify(ary); + + if (beg >= RARRAY(ary)->len) { + len = beg + rlen; + if (len >= RARRAY(ary)->aux.capa) { + REALLOC_N(RARRAY(ary)->ptr, VALUE, len); + RARRAY(ary)->aux.capa = len; + } + rb_mem_clear(RARRAY(ary)->ptr + RARRAY(ary)->len, beg - RARRAY(ary)->len); + if (rlen > 0) { + MEMCPY(RARRAY(ary)->ptr + beg, RARRAY(rpl)->ptr, VALUE, rlen); + } + RARRAY(ary)->len = len; + } + else { + long alen; + + if (beg + len > RARRAY(ary)->len) { + len = RARRAY(ary)->len - beg; + } + + alen = RARRAY(ary)->len + rlen - len; + if (alen >= RARRAY(ary)->aux.capa) { + REALLOC_N(RARRAY(ary)->ptr, VALUE, alen); + RARRAY(ary)->aux.capa = alen; + } + + if (len != rlen) { + MEMMOVE(RARRAY(ary)->ptr + beg + rlen, RARRAY(ary)->ptr + beg + len, + VALUE, RARRAY(ary)->len - (beg + len)); + RARRAY(ary)->len = alen; + } + if (rlen > 0) { + MEMMOVE(RARRAY(ary)->ptr + beg, RARRAY(rpl)->ptr, VALUE, rlen); + } + } +} + +/* + * call-seq: + * array[index] = obj -> obj + * array[start, length] = obj or an_array or nil -> obj or an_array or nil + * array[range] = obj or an_array or nil -> obj or an_array or nil + * + * Element Assignment---Sets the element at _index_, + * or replaces a subarray starting at _start_ and + * continuing for _length_ elements, or replaces a subarray + * specified by _range_. If indices are greater than + * the current capacity of the array, the array grows + * automatically. A negative indices will count backward + * from the end of the array. Inserts elements if _length_ is + * zero. An +IndexError+ is raised if a negative index points + * past the beginning of the array. See also + * <code>Array#push</code>, and <code>Array#unshift</code>. + * + * a = Array.new + * a[4] = "4"; #=> [nil, nil, nil, nil, "4"] + * a[0, 3] = [ 'a', 'b', 'c' ] #=> ["a", "b", "c", nil, "4"] + * a[1..2] = [ 1, 2 ] #=> ["a", 1, 2, nil, "4"] + * a[0, 2] = "?" #=> ["?", 2, nil, "4"] + * a[0..2] = "A" #=> ["A", "4"] + * a[-1] = "Z" #=> ["A", "Z"] + * a[1..-1] = nil #=> ["A", nil] + * a[1..-1] = [] #=> ["A"] + */ + +static VALUE +rb_ary_aset(argc, argv, ary) + int argc; + VALUE *argv; + VALUE ary; +{ + long offset, beg, len; + + if (argc == 3) { + rb_ary_splice(ary, NUM2LONG(argv[0]), NUM2LONG(argv[1]), argv[2]); + return argv[2]; + } + if (argc != 2) { + rb_raise(rb_eArgError, "wrong number of arguments (%d for 2)", argc); + } + if (FIXNUM_P(argv[0])) { + offset = FIX2LONG(argv[0]); + goto fixnum; + } + if (rb_range_beg_len(argv[0], &beg, &len, RARRAY(ary)->len, 1)) { + /* check if idx is Range */ + rb_ary_splice(ary, beg, len, argv[1]); + return argv[1]; + } + + offset = NUM2LONG(argv[0]); +fixnum: + rb_ary_store(ary, offset, argv[1]); + return argv[1]; +} + +/* + * call-seq: + * array.insert(index, obj...) -> array + * + * Inserts the given values before the element with the given index + * (which may be negative). + * + * a = %w{ a b c d } + * a.insert(2, 99) #=> ["a", "b", 99, "c", "d"] + * a.insert(-2, 1, 2, 3) #=> ["a", "b", 99, "c", 1, 2, 3, "d"] + */ + +static VALUE +rb_ary_insert(argc, argv, ary) + int argc; + VALUE *argv; + VALUE ary; +{ + long pos; + + if (argc < 1) { + rb_raise(rb_eArgError, "wrong number of arguments (at least 1)"); + } + pos = NUM2LONG(argv[0]); + if (pos == -1) { + pos = RARRAY(ary)->len; + } + else if (pos < 0) { + pos++; + } + + if (argc == 1) return ary; + rb_ary_splice(ary, pos, 0, rb_ary_new4(argc - 1, argv + 1)); + return ary; +} + +/* + * call-seq: + * array.each {|item| block } -> array + * + * Calls <i>block</i> once for each element in <i>self</i>, passing that + * element as a parameter. + * + * a = [ "a", "b", "c" ] + * a.each {|x| print x, " -- " } + * + * produces: + * + * a -- b -- c -- + */ + +VALUE +rb_ary_each(ary) + VALUE ary; +{ + long i; + + for (i=0; i<RARRAY(ary)->len; i++) { + rb_yield(RARRAY(ary)->ptr[i]); + } + return ary; +} + +/* + * call-seq: + * array.each_index {|index| block } -> array + * + * Same as <code>Array#each</code>, but passes the index of the element + * instead of the element itself. + * + * a = [ "a", "b", "c" ] + * a.each_index {|x| print x, " -- " } + * + * produces: + * + * 0 -- 1 -- 2 -- + */ + +static VALUE +rb_ary_each_index(ary) + VALUE ary; +{ + long i; + + for (i=0; i<RARRAY(ary)->len; i++) { + rb_yield(LONG2NUM(i)); + } + return ary; +} + +/* + * call-seq: + * array.reverse_each {|item| block } + * + * Same as <code>Array#each</code>, but traverses <i>self</i> in reverse + * order. + * + * a = [ "a", "b", "c" ] + * a.reverse_each {|x| print x, " " } + * + * produces: + * + * c b a + */ + +static VALUE +rb_ary_reverse_each(ary) + VALUE ary; +{ + long len = RARRAY(ary)->len; + + while (len--) { + rb_yield(RARRAY(ary)->ptr[len]); + if (RARRAY(ary)->len < len) { + len = RARRAY(ary)->len; + } + } + return ary; +} + +/* + * call-seq: + * array.length -> int + * + * Returns the number of elements in <i>self</i>. May be zero. + * + * [ 1, 2, 3, 4, 5 ].length #=> 5 + */ + +static VALUE +rb_ary_length(ary) + VALUE ary; +{ + return LONG2NUM(RARRAY(ary)->len); +} + +/* + * call-seq: + * array.empty? -> true or false + * + * Returns <code>true</code> if <i>self</i> array contains no elements. + * + * [].empty? #=> true + */ + +static VALUE +rb_ary_empty_p(ary) + VALUE ary; +{ + if (RARRAY(ary)->len == 0) + return Qtrue; + return Qfalse; +} + +VALUE +rb_ary_dup(ary) + VALUE ary; +{ + VALUE dup = rb_ary_new2(RARRAY(ary)->len); + + DUPSETUP(dup, ary); + MEMCPY(RARRAY(dup)->ptr, RARRAY(ary)->ptr, VALUE, RARRAY(ary)->len); + RARRAY(dup)->len = RARRAY(ary)->len; + return dup; +} + +extern VALUE rb_output_fs; + +static VALUE +recursive_join(ary, arg, recur) + VALUE ary; + VALUE *arg; + int recur; +{ + if (recur) { + return rb_str_new2("[...]"); + } + return rb_ary_join(arg[0], arg[1]); +} + +VALUE +rb_ary_join(ary, sep) + VALUE ary, sep; +{ + long len = 1, i; + int taint = Qfalse; + VALUE result, tmp; + + if (RARRAY(ary)->len == 0) return rb_str_new(0, 0); + if (OBJ_TAINTED(ary) || OBJ_TAINTED(sep)) taint = Qtrue; + + for (i=0; i<RARRAY(ary)->len; i++) { + tmp = rb_check_string_type(RARRAY(ary)->ptr[i]); + len += NIL_P(tmp) ? 10 : RSTRING(tmp)->len; + } + if (!NIL_P(sep)) { + StringValue(sep); + len += RSTRING(sep)->len * (RARRAY(ary)->len - 1); + } + result = rb_str_buf_new(len); + for (i=0; i<RARRAY(ary)->len; i++) { + tmp = RARRAY(ary)->ptr[i]; + switch (TYPE(tmp)) { + case T_STRING: + break; + case T_ARRAY: + { + VALUE args[2]; + + args[0] = tmp; + args[1] = sep; + tmp = rb_exec_recursive(recursive_join, ary, (VALUE)args); + } + break; + default: + tmp = rb_obj_as_string(tmp); + } + if (i > 0 && !NIL_P(sep)) + rb_str_buf_append(result, sep); + rb_str_buf_append(result, tmp); + if (OBJ_TAINTED(tmp)) taint = Qtrue; + } + + if (taint) OBJ_TAINT(result); + return result; +} + +/* + * call-seq: + * array.join(sep=$,) -> str + * + * Returns a string created by converting each element of the array to + * a string, separated by <i>sep</i>. + * + * [ "a", "b", "c" ].join #=> "abc" + * [ "a", "b", "c" ].join("-") #=> "a-b-c" + */ + +static VALUE +rb_ary_join_m(argc, argv, ary) + int argc; + VALUE *argv; + VALUE ary; +{ + VALUE sep; + + rb_scan_args(argc, argv, "01", &sep); + if (NIL_P(sep)) sep = rb_output_fs; + + return rb_ary_join(ary, sep); +} + +/* + * call-seq: + * array.to_s -> string + * + * Returns _self_<code>.join</code>. + * + * [ "a", "e", "i", "o" ].to_s #=> "aeio" + * + */ + +VALUE +rb_ary_to_s(ary) + VALUE ary; +{ + if (RARRAY(ary)->len == 0) return rb_str_new(0, 0); + + return rb_ary_join(ary, rb_output_fs); +} + +static VALUE +inspect_ary(ary, dummy, recur) + VALUE ary; + VALUE dummy; + int recur; +{ + int tainted = OBJ_TAINTED(ary); + long i; + VALUE s, str; + + if (recur) return rb_tainted_str_new2("[...]"); + str = rb_str_buf_new2("["); + for (i=0; i<RARRAY(ary)->len; i++) { + s = rb_inspect(RARRAY(ary)->ptr[i]); + if (OBJ_TAINTED(s)) tainted = Qtrue; + if (i > 0) rb_str_buf_cat2(str, ", "); + rb_str_buf_append(str, s); + } + rb_str_buf_cat2(str, "]"); + if (tainted) OBJ_TAINT(str); + return str; +} + +/* + * call-seq: + * array.inspect -> string + * + * Create a printable version of <i>array</i>. + */ + +static VALUE +rb_ary_inspect(ary) + VALUE ary; +{ + if (RARRAY(ary)->len == 0) return rb_str_new2("[]"); + return rb_exec_recursive(inspect_ary, ary, 0); +} + +/* + * call-seq: + * array.to_a -> array + * + * Returns _self_. If called on a subclass of Array, converts + * the receiver to an Array object. + */ + +static VALUE +rb_ary_to_a(ary) + VALUE ary; +{ + if (rb_obj_class(ary) != rb_cArray) { + VALUE dup = rb_ary_new2(RARRAY(ary)->len); + rb_ary_replace(dup, ary); + return dup; + } + return ary; +} + +/* + * call-seq: + * array.to_ary -> array + * + * Returns _self_. + */ + +static VALUE +rb_ary_to_ary_m(ary) + VALUE ary; +{ + return ary; +} + +VALUE +rb_ary_reverse(ary) + VALUE ary; +{ + VALUE *p1, *p2; + VALUE tmp; + + rb_ary_modify(ary); + if (RARRAY(ary)->len > 1) { + p1 = RARRAY(ary)->ptr; + p2 = p1 + RARRAY(ary)->len - 1; /* points last item */ + + while (p1 < p2) { + tmp = *p1; + *p1++ = *p2; + *p2-- = tmp; + } + } + return ary; +} + +/* + * call-seq: + * array.reverse! -> array + * + * Reverses _self_ in place. + * + * a = [ "a", "b", "c" ] + * a.reverse! #=> ["c", "b", "a"] + * a #=> ["c", "b", "a"] + */ + +static VALUE +rb_ary_reverse_bang(ary) + VALUE ary; +{ + return rb_ary_reverse(ary); +} + +/* + * call-seq: + * array.reverse -> an_array + * + * Returns a new array containing <i>self</i>'s elements in reverse order. + * + * [ "a", "b", "c" ].reverse #=> ["c", "b", "a"] + * [ 1 ].reverse #=> [1] + */ + +static VALUE +rb_ary_reverse_m(ary) + VALUE ary; +{ + return rb_ary_reverse(rb_ary_dup(ary)); +} + +struct ary_sort_data { + VALUE ary; + VALUE *ptr; + long len; +}; + +static void +ary_sort_check(data) + struct ary_sort_data *data; +{ + if (RARRAY(data->ary)->ptr != data->ptr || RARRAY(data->ary)->len != data->len) { + rb_raise(rb_eRuntimeError, "array modified during sort"); + } +} + +static int +sort_1(a, b, data) + VALUE *a, *b; + struct ary_sort_data *data; +{ + VALUE retval = rb_yield_values(2, *a, *b); + int n; + + n = rb_cmpint(retval, *a, *b); + ary_sort_check(data); + return n; +} + +static int +sort_2(ap, bp, data) + VALUE *ap, *bp; + struct ary_sort_data *data; +{ + VALUE retval; + VALUE a = *ap, b = *bp; + int n; + + if (FIXNUM_P(a) && FIXNUM_P(b)) { + if ((long)a > (long)b) return 1; + if ((long)a < (long)b) return -1; + return 0; + } + if (TYPE(a) == T_STRING && TYPE(b) == T_STRING) { + return rb_str_cmp(a, b); + } + + retval = rb_funcall(a, id_cmp, 1, b); + n = rb_cmpint(retval, a, b); + ary_sort_check(data); + + return n; +} + +static VALUE +sort_internal(ary) + VALUE ary; +{ + struct ary_sort_data data; + + data.ary = ary; + data.ptr = RARRAY(ary)->ptr; data.len = RARRAY(ary)->len; + qsort(RARRAY(ary)->ptr, RARRAY(ary)->len, sizeof(VALUE), + rb_block_given_p()?sort_1:sort_2, &data); + return ary; +} + +static VALUE +sort_unlock(ary) + VALUE ary; +{ + FL_UNSET(ary, ARY_TMPLOCK); + return ary; +} + +/* + * call-seq: + * array.sort! -> array + * array.sort! {| a,b | block } -> array + * + * Sorts _self_. Comparisons for + * the sort will be done using the <code><=></code> operator or using + * an optional code block. The block implements a comparison between + * <i>a</i> and <i>b</i>, returning -1, 0, or +1. See also + * <code>Enumerable#sort_by</code>. + * + * a = [ "d", "a", "e", "c", "b" ] + * a.sort #=> ["a", "b", "c", "d", "e"] + * a.sort {|x,y| y <=> x } #=> ["e", "d", "c", "b", "a"] + */ + +VALUE +rb_ary_sort_bang(ary) + VALUE ary; +{ + rb_ary_modify(ary); + if (RARRAY(ary)->len > 1) { + FL_SET(ary, ARY_TMPLOCK); /* prohibit modification during sort */ + rb_ensure(sort_internal, ary, sort_unlock, ary); + } + return ary; +} + +/* + * call-seq: + * array.sort -> an_array + * array.sort {| a,b | block } -> an_array + * + * Returns a new array created by sorting <i>self</i>. Comparisons for + * the sort will be done using the <code><=></code> operator or using + * an optional code block. The block implements a comparison between + * <i>a</i> and <i>b</i>, returning -1, 0, or +1. See also + * <code>Enumerable#sort_by</code>. + * + * a = [ "d", "a", "e", "c", "b" ] + * a.sort #=> ["a", "b", "c", "d", "e"] + * a.sort {|x,y| y <=> x } #=> ["e", "d", "c", "b", "a"] + */ + +VALUE +rb_ary_sort(ary) + VALUE ary; +{ + ary = rb_ary_dup(ary); + rb_ary_sort_bang(ary); + return ary; +} + +/* + * call-seq: + * array.collect {|item| block } -> an_array + * array.map {|item| block } -> an_array + * + * Invokes <i>block</i> once for each element of <i>self</i>. Creates a + * new array containing the values returned by the block. + * See also <code>Enumerable#collect</code>. + * + * a = [ "a", "b", "c", "d" ] + * a.collect {|x| x + "!" } #=> ["a!", "b!", "c!", "d!"] + * a #=> ["a", "b", "c", "d"] + */ + +static VALUE +rb_ary_collect(ary) + VALUE ary; +{ + long i; + VALUE collect; + + if (!rb_block_given_p()) { + return rb_ary_new4(RARRAY(ary)->len, RARRAY(ary)->ptr); + } + + collect = rb_ary_new2(RARRAY(ary)->len); + for (i = 0; i < RARRAY(ary)->len; i++) { + rb_ary_push(collect, rb_yield(RARRAY(ary)->ptr[i])); + } + return collect; +} + +/* + * call-seq: + * array.collect! {|item| block } -> array + * array.map! {|item| block } -> array + * + * Invokes the block once for each element of _self_, replacing the + * element with the value returned by _block_. + * See also <code>Enumerable#collect</code>. + * + * a = [ "a", "b", "c", "d" ] + * a.collect! {|x| x + "!" } + * a #=> [ "a!", "b!", "c!", "d!" ] + */ + +static VALUE +rb_ary_collect_bang(ary) + VALUE ary; +{ + long i; + + rb_ary_modify(ary); + for (i = 0; i < RARRAY(ary)->len; i++) { + rb_ary_store(ary, i, rb_yield(RARRAY(ary)->ptr[i])); + } + return ary; +} + +VALUE +rb_get_values_at(obj, olen, argc, argv, func) + VALUE obj; + long olen; + int argc; + VALUE *argv; + VALUE (*func) _((VALUE,long)); +{ + VALUE result = rb_ary_new2(argc); + long beg, len, i, j; + + for (i=0; i<argc; i++) { + if (FIXNUM_P(argv[i])) { + rb_ary_push(result, (*func)(obj, FIX2LONG(argv[i]))); + continue; + } + /* check if idx is Range */ + switch (rb_range_beg_len(argv[i], &beg, &len, olen, 0)) { + case Qfalse: + break; + case Qnil: + continue; + default: + for (j=0; j<len; j++) { + rb_ary_push(result, (*func)(obj, j+beg)); + } + continue; + } + rb_ary_push(result, (*func)(obj, NUM2LONG(argv[i]))); + } + return result; +} + +/* + * call-seq: + * array.values_at(selector,... ) -> an_array + * + * Returns an array containing the elements in + * _self_ corresponding to the given selector(s). The selectors + * may be either integer indices or ranges. + * See also <code>Array#select</code>. + * + * a = %w{ a b c d e f } + * a.values_at(1, 3, 5) + * a.values_at(1, 3, 5, 7) + * a.values_at(-1, -3, -5, -7) + * a.values_at(1..3, 2...5) + */ + +static VALUE +rb_ary_values_at(argc, argv, ary) + int argc; + VALUE *argv; + VALUE ary; +{ + return rb_get_values_at(ary, RARRAY(ary)->len, argc, argv, rb_ary_entry); +} + +/* + * call-seq: + * array.select {|item| block } -> an_array + * + * Invokes the block passing in successive elements from <i>array</i>, + * returning an array containing those elements for which the block + * returns a true value (equivalent to <code>Enumerable#select</code>). + * + * a = %w{ a b c d e f } + * a.select {|v| v =~ /[aeiou]/} #=> ["a", "e"] + */ + +static VALUE +rb_ary_select(ary) + VALUE ary; +{ + VALUE result; + long i; + + result = rb_ary_new2(RARRAY(ary)->len); + for (i = 0; i < RARRAY(ary)->len; i++) { + if (RTEST(rb_yield(RARRAY(ary)->ptr[i]))) { + rb_ary_push(result, rb_ary_elt(ary, i)); + } + } + return result; +} + +/* + * call-seq: + * array.delete(obj) -> obj or nil + * array.delete(obj) { block } -> obj or nil + * + * Deletes items from <i>self</i> that are equal to <i>obj</i>. If + * the item is not found, returns <code>nil</code>. If the optional + * code block is given, returns the result of <i>block</i> if the item + * is not found. + * + * a = [ "a", "b", "b", "b", "c" ] + * a.delete("b") #=> "b" + * a #=> ["a", "c"] + * a.delete("z") #=> nil + * a.delete("z") { "not found" } #=> "not found" + */ + +VALUE +rb_ary_delete(ary, item) + VALUE ary; + VALUE item; +{ + long i1, i2; + + for (i1 = i2 = 0; i1 < RARRAY(ary)->len; i1++) { + VALUE e = RARRAY(ary)->ptr[i1]; + + if (rb_equal(e, item)) continue; + if (i1 != i2) { + rb_ary_store(ary, i2, e); + } + i2++; + } + if (RARRAY(ary)->len == i2) { + if (rb_block_given_p()) { + return rb_yield(item); + } + return Qnil; + } + + rb_ary_modify(ary); + if (RARRAY(ary)->len > i2) { + RARRAY(ary)->len = i2; + if (i2 * 2 < RARRAY(ary)->aux.capa && + RARRAY(ary)->aux.capa > ARY_DEFAULT_SIZE) { + REALLOC_N(RARRAY(ary)->ptr, VALUE, i2 * 2); + RARRAY(ary)->aux.capa = i2 * 2; + } + } + + return item; +} + +VALUE +rb_ary_delete_at(ary, pos) + VALUE ary; + long pos; +{ + long i, len = RARRAY(ary)->len; + VALUE del; + + if (pos >= len) return Qnil; + if (pos < 0) { + pos += len; + if (pos < 0) return Qnil; + } + + rb_ary_modify(ary); + del = RARRAY(ary)->ptr[pos]; + for (i = pos + 1; i < len; i++, pos++) { + RARRAY(ary)->ptr[pos] = RARRAY(ary)->ptr[i]; + } + RARRAY(ary)->len = pos; + + return del; +} + +/* + * call-seq: + * array.delete_at(index) -> obj or nil + * + * Deletes the element at the specified index, returning that element, + * or <code>nil</code> if the index is out of range. See also + * <code>Array#slice!</code>. + * + * a = %w( ant bat cat dog ) + * a.delete_at(2) #=> "cat" + * a #=> ["ant", "bat", "dog"] + * a.delete_at(99) #=> nil + */ + +static VALUE +rb_ary_delete_at_m(ary, pos) + VALUE ary, pos; +{ + return rb_ary_delete_at(ary, NUM2LONG(pos)); +} + +/* + * call-seq: + * array.slice!(index) -> obj or nil + * array.slice!(start, length) -> sub_array or nil + * array.slice!(range) -> sub_array or nil + * + * Deletes the element(s) given by an index (optionally with a length) + * or by a range. Returns the deleted object, subarray, or + * <code>nil</code> if the index is out of range. Equivalent to: + * + * def slice!(*args) + * result = self[*args] + * self[*args] = nil + * result + * end + * + * a = [ "a", "b", "c" ] + * a.slice!(1) #=> "b" + * a #=> ["a", "c"] + * a.slice!(-1) #=> "c" + * a #=> ["a"] + * a.slice!(100) #=> nil + * a #=> ["a"] + */ + +static VALUE +rb_ary_slice_bang(argc, argv, ary) + int argc; + VALUE *argv; + VALUE ary; +{ + VALUE arg1, arg2; + long pos, len; + + if (rb_scan_args(argc, argv, "11", &arg1, &arg2) == 2) { + pos = NUM2LONG(arg1); + len = NUM2LONG(arg2); + delete_pos_len: + if (pos < 0) { + pos = RARRAY(ary)->len + pos; + } + arg2 = rb_ary_subseq(ary, pos, len); + rb_ary_splice(ary, pos, len, Qundef); /* Qnil/rb_ary_new2(0) */ + return arg2; + } + + if (!FIXNUM_P(arg1) && rb_range_beg_len(arg1, &pos, &len, RARRAY(ary)->len, 1)) { + goto delete_pos_len; + } + + return rb_ary_delete_at(ary, NUM2LONG(arg1)); +} + +/* + * call-seq: + * array.reject! {|item| block } -> array or nil + * + * Equivalent to <code>Array#delete_if</code>, deleting elements from + * _self_ for which the block evaluates to true, but returns + * <code>nil</code> if no changes were made. Also see + * <code>Enumerable#reject</code>. + */ + +static VALUE +rb_ary_reject_bang(ary) + VALUE ary; +{ + long i1, i2; + + rb_ary_modify(ary); + for (i1 = i2 = 0; i1 < RARRAY(ary)->len; i1++) { + VALUE v = RARRAY(ary)->ptr[i1]; + if (RTEST(rb_yield(v))) continue; + if (i1 != i2) { + rb_ary_store(ary, i2, v); + } + i2++; + } + if (RARRAY(ary)->len == i2) return Qnil; + if (i2 < RARRAY(ary)->len) + RARRAY(ary)->len = i2; + + return ary; +} + +/* + * call-seq: + * array.reject {|item| block } -> an_array + * + * Returns a new array containing the items in _self_ + * for which the block is not true. + */ + +static VALUE +rb_ary_reject(ary) + VALUE ary; +{ + ary = rb_ary_dup(ary); + rb_ary_reject_bang(ary); + return ary; +} + +/* + * call-seq: + * array.delete_if {|item| block } -> array + * + * Deletes every element of <i>self</i> for which <i>block</i> evaluates + * to <code>true</code>. + * + * a = [ "a", "b", "c" ] + * a.delete_if {|x| x >= "b" } #=> ["a"] + */ + +static VALUE +rb_ary_delete_if(ary) + VALUE ary; +{ + rb_ary_reject_bang(ary); + return ary; +} + +/* + * call-seq: + * array.zip(arg, ...) -> an_array + * array.zip(arg, ...) {| arr | block } -> nil + * + * Converts any arguments to arrays, then merges elements of + * <i>self</i> with corresponding elements from each argument. This + * generates a sequence of <code>self.size</code> <em>n</em>-element + * arrays, where <em>n</em> is one more that the count of arguments. If + * the size of any argument is less than <code>enumObj.size</code>, + * <code>nil</code> values are supplied. If a block given, it is + * invoked for each output array, otherwise an array of arrays is + * returned. + * + * a = [ 4, 5, 6 ] + * b = [ 7, 8, 9 ] + * + * [1,2,3].zip(a, b) #=> [[1, 4, 7], [2, 5, 8], [3, 6, 9]] + * [1,2].zip(a,b) #=> [[1, 4, 7], [2, 5, 8]] + * a.zip([1,2],[8]) #=> [[4,1,8], [5,2,nil], [6,nil,nil]] + */ + +static VALUE +rb_ary_zip(argc, argv, ary) + int argc; + VALUE *argv; + VALUE ary; +{ + int i, j; + long len; + VALUE result; + + for (i=0; i<argc; i++) { + argv[i] = to_a(argv[i]); + } + if (rb_block_given_p()) { + for (i=0; i<RARRAY(ary)->len; i++) { + VALUE tmp = rb_ary_new2(argc+1); + + rb_ary_push(tmp, rb_ary_elt(ary, i)); + for (j=0; j<argc; j++) { + rb_ary_push(tmp, rb_ary_elt(argv[j], i)); + } + rb_yield(tmp); + } + return Qnil; + } + len = RARRAY(ary)->len; + result = rb_ary_new2(len); + for (i=0; i<len; i++) { + VALUE tmp = rb_ary_new2(argc+1); + + rb_ary_push(tmp, rb_ary_elt(ary, i)); + for (j=0; j<argc; j++) { + rb_ary_push(tmp, rb_ary_elt(argv[j], i)); + } + rb_ary_push(result, tmp); + } + return result; +} + +/* + * call-seq: + * array.transpose -> an_array + * + * Assumes that <i>self</i> is an array of arrays and transposes the + * rows and columns. + * + * a = [[1,2], [3,4], [5,6]] + * a.transpose #=> [[1, 3, 5], [2, 4, 6]] + */ + +static VALUE +rb_ary_transpose(ary) + VALUE ary; +{ + long elen = -1, alen, i, j; + VALUE tmp, result = 0; + + alen = RARRAY(ary)->len; + if (alen == 0) return rb_ary_dup(ary); + for (i=0; i<alen; i++) { + tmp = to_ary(rb_ary_elt(ary, i)); + if (elen < 0) { /* first element */ + elen = RARRAY(tmp)->len; + result = rb_ary_new2(elen); + for (j=0; j<elen; j++) { + rb_ary_store(result, j, rb_ary_new2(alen)); + } + } + else if (elen != RARRAY(tmp)->len) { + rb_raise(rb_eIndexError, "element size differs (%d should be %d)", + RARRAY(tmp)->len, elen); + } + for (j=0; j<elen; j++) { + rb_ary_store(rb_ary_elt(result, j), i, rb_ary_elt(tmp, j)); + } + } + return result; +} + +/* + * call-seq: + * array.replace(other_array) -> array + * + * Replaces the contents of <i>self</i> with the contents of + * <i>other_array</i>, truncating or expanding if necessary. + * + * a = [ "a", "b", "c", "d", "e" ] + * a.replace([ "x", "y", "z" ]) #=> ["x", "y", "z"] + * a #=> ["x", "y", "z"] + */ + +static VALUE +rb_ary_replace(copy, orig) + VALUE copy, orig; +{ + VALUE shared; + + rb_ary_modify(copy); + orig = to_ary(orig); + if (copy == orig) return copy; + shared = ary_make_shared(orig); + if (RARRAY(copy)->ptr && !FL_TEST(copy, ELTS_SHARED)) + free(RARRAY(copy)->ptr); + RARRAY(copy)->ptr = RARRAY(orig)->ptr; + RARRAY(copy)->len = RARRAY(orig)->len; + RARRAY(copy)->aux.shared = shared; + FL_SET(copy, ELTS_SHARED); + + return copy; +} + +/* + * call-seq: + * array.clear -> array + * + * Removes all elements from _self_. + * + * a = [ "a", "b", "c", "d", "e" ] + * a.clear #=> [ ] + */ + +VALUE +rb_ary_clear(ary) + VALUE ary; +{ + rb_ary_modify(ary); + RARRAY(ary)->len = 0; + if (ARY_DEFAULT_SIZE * 2 < RARRAY(ary)->aux.capa) { + REALLOC_N(RARRAY(ary)->ptr, VALUE, ARY_DEFAULT_SIZE * 2); + RARRAY(ary)->aux.capa = ARY_DEFAULT_SIZE * 2; + } + return ary; +} + +/* + * call-seq: + * array.fill(obj) -> array + * array.fill(obj, start [, length]) -> array + * array.fill(obj, range ) -> array + * array.fill {|index| block } -> array + * array.fill(start [, length] ) {|index| block } -> array + * array.fill(range) {|index| block } -> array + * + * The first three forms set the selected elements of <i>self</i> (which + * may be the entire array) to <i>obj</i>. A <i>start</i> of + * <code>nil</code> is equivalent to zero. A <i>length</i> of + * <code>nil</code> is equivalent to <i>self.length</i>. The last three + * forms fill the array with the value of the block. The block is + * passed the absolute index of each element to be filled. + * + * a = [ "a", "b", "c", "d" ] + * a.fill("x") #=> ["x", "x", "x", "x"] + * a.fill("z", 2, 2) #=> ["x", "x", "z", "z"] + * a.fill("y", 0..1) #=> ["y", "y", "z", "z"] + * a.fill {|i| i*i} #=> [0, 1, 4, 9] + * a.fill(-2) {|i| i*i*i} #=> [0, 1, 8, 27] + */ + +static VALUE +rb_ary_fill(argc, argv, ary) + int argc; + VALUE *argv; + VALUE ary; +{ + VALUE item, arg1, arg2; + long beg, end, len; + VALUE *p, *pend; + int block_p = Qfalse; + + if (rb_block_given_p()) { + block_p = Qtrue; + rb_scan_args(argc, argv, "02", &arg1, &arg2); + argc += 1; /* hackish */ + } + else { + rb_scan_args(argc, argv, "12", &item, &arg1, &arg2); + } + switch (argc) { + case 1: + beg = 0; + len = RARRAY(ary)->len; + break; + case 2: + if (rb_range_beg_len(arg1, &beg, &len, RARRAY(ary)->len, 1)) { + break; + } + /* fall through */ + case 3: + beg = NIL_P(arg1) ? 0 : NUM2LONG(arg1); + if (beg < 0) { + beg = RARRAY(ary)->len + beg; + if (beg < 0) beg = 0; + } + len = NIL_P(arg2) ? RARRAY(ary)->len - beg : NUM2LONG(arg2); + break; + } + rb_ary_modify(ary); + end = beg + len; + if (end > RARRAY(ary)->len) { + if (end >= RARRAY(ary)->aux.capa) { + REALLOC_N(RARRAY(ary)->ptr, VALUE, end); + RARRAY(ary)->aux.capa = end; + } + if (beg > RARRAY(ary)->len) { + rb_mem_clear(RARRAY(ary)->ptr + RARRAY(ary)->len, end - RARRAY(ary)->len); + } + RARRAY(ary)->len = end; + } + + if (block_p) { + VALUE v; + long i; + + for (i=beg; i<end; i++) { + v = rb_yield(LONG2NUM(i)); + if (i>=RARRAY(ary)->len) break; + RARRAY(ary)->ptr[i] = v; + } + } + else { + p = RARRAY(ary)->ptr + beg; + pend = p + len; + while (p < pend) { + *p++ = item; + } + } + return ary; +} + +/* + * call-seq: + * array + other_array -> an_array + * + * Concatenation---Returns a new array built by concatenating the + * two arrays together to produce a third array. + * + * [ 1, 2, 3 ] + [ 4, 5 ] #=> [ 1, 2, 3, 4, 5 ] + */ + +VALUE +rb_ary_plus(x, y) + VALUE x, y; +{ + VALUE z; + long len; + + y = to_ary(y); + len = RARRAY(x)->len + RARRAY(y)->len; + z = rb_ary_new2(len); + MEMCPY(RARRAY(z)->ptr, RARRAY(x)->ptr, VALUE, RARRAY(x)->len); + MEMCPY(RARRAY(z)->ptr + RARRAY(x)->len, RARRAY(y)->ptr, VALUE, RARRAY(y)->len); + RARRAY(z)->len = len; + return z; +} + +/* + * call-seq: + * array.concat(other_array) -> array + * + * Appends the elements in other_array to _self_. + * + * [ "a", "b" ].concat( ["c", "d"] ) #=> [ "a", "b", "c", "d" ] + */ + + +VALUE +rb_ary_concat(x, y) + VALUE x, y; +{ + y = to_ary(y); + if (RARRAY(y)->len > 0) { + rb_ary_splice(x, RARRAY(x)->len, 0, y); + } + return x; +} + + +/* + * call-seq: + * array * int -> an_array + * array * str -> a_string + * + * Repetition---With a String argument, equivalent to + * self.join(str). Otherwise, returns a new array + * built by concatenating the _int_ copies of _self_. + * + * + * [ 1, 2, 3 ] * 3 #=> [ 1, 2, 3, 1, 2, 3, 1, 2, 3 ] + * [ 1, 2, 3 ] * "," #=> "1,2,3" + * + */ + +static VALUE +rb_ary_times(ary, times) + VALUE ary, times; +{ + VALUE ary2, tmp; + long i, len; + + tmp = rb_check_string_type(times); + if (!NIL_P(tmp)) { + return rb_ary_join(ary, tmp); + } + + len = NUM2LONG(times); + if (len == 0) return ary_new(rb_obj_class(ary), 0); + if (len < 0) { + rb_raise(rb_eArgError, "negative argument"); + } + if (LONG_MAX/len < RARRAY(ary)->len) { + rb_raise(rb_eArgError, "argument too big"); + } + len *= RARRAY(ary)->len; + + ary2 = ary_new(rb_obj_class(ary), len); + RARRAY(ary2)->len = len; + + for (i=0; i<len; i+=RARRAY(ary)->len) { + MEMCPY(RARRAY(ary2)->ptr+i, RARRAY(ary)->ptr, VALUE, RARRAY(ary)->len); + } + OBJ_INFECT(ary2, ary); + + return ary2; +} + +/* + * call-seq: + * array.assoc(obj) -> an_array or nil + * + * Searches through an array whose elements are also arrays + * comparing _obj_ with the first element of each contained array + * using obj.==. + * Returns the first contained array that matches (that + * is, the first associated array), + * or +nil+ if no match is found. + * See also <code>Array#rassoc</code>. + * + * s1 = [ "colors", "red", "blue", "green" ] + * s2 = [ "letters", "a", "b", "c" ] + * s3 = "foo" + * a = [ s1, s2, s3 ] + * a.assoc("letters") #=> [ "letters", "a", "b", "c" ] + * a.assoc("foo") #=> nil + */ + +VALUE +rb_ary_assoc(ary, key) + VALUE ary, key; +{ + long i; + VALUE v; + + for (i = 0; i < RARRAY(ary)->len; ++i) { + v = RARRAY(ary)->ptr[i]; + if (TYPE(v) == T_ARRAY && + RARRAY(v)->len > 0 && + rb_equal(RARRAY(v)->ptr[0], key)) + return v; + } + return Qnil; +} + +/* + * call-seq: + * array.rassoc(key) -> an_array or nil + * + * Searches through the array whose elements are also arrays. Compares + * <em>key</em> with the second element of each contained array using + * <code>==</code>. Returns the first contained array that matches. See + * also <code>Array#assoc</code>. + * + * a = [ [ 1, "one"], [2, "two"], [3, "three"], ["ii", "two"] ] + * a.rassoc("two") #=> [2, "two"] + * a.rassoc("four") #=> nil + */ + +VALUE +rb_ary_rassoc(ary, value) + VALUE ary, value; +{ + long i; + VALUE v; + + for (i = 0; i < RARRAY(ary)->len; ++i) { + v = RARRAY(ary)->ptr[i]; + if (TYPE(v) == T_ARRAY && + RARRAY(v)->len > 1 && + rb_equal(RARRAY(v)->ptr[1], value)) + return v; + } + return Qnil; +} + +/* + * call-seq: + * array == other_array -> bool + * + * Equality---Two arrays are equal if they contain the same number + * of elements and if each element is equal to (according to + * Object.==) the corresponding element in the other array. + * + * [ "a", "c" ] == [ "a", "c", 7 ] #=> false + * [ "a", "c", 7 ] == [ "a", "c", 7 ] #=> true + * [ "a", "c", 7 ] == [ "a", "d", "f" ] #=> false + * + */ + +static VALUE +rb_ary_equal(ary1, ary2) + VALUE ary1, ary2; +{ + long i; + + if (ary1 == ary2) return Qtrue; + if (TYPE(ary2) != T_ARRAY) { + if (!rb_respond_to(ary2, rb_intern("to_ary"))) { + return Qfalse; + } + return rb_equal(ary2, ary1); + } + if (RARRAY(ary1)->len != RARRAY(ary2)->len) return Qfalse; + for (i=0; i<RARRAY(ary1)->len; i++) { + if (!rb_equal(rb_ary_elt(ary1, i), rb_ary_elt(ary2, i))) + return Qfalse; + } + return Qtrue; +} + +/* + * call-seq: + * array.eql?(other) -> true or false + * + * Returns <code>true</code> if _array_ and _other_ are the same object, + * or are both arrays with the same content. + */ + +static VALUE +rb_ary_eql(ary1, ary2) + VALUE ary1, ary2; +{ + long i; + + if (ary1 == ary2) return Qtrue; + if (TYPE(ary2) != T_ARRAY) return Qfalse; + if (RARRAY(ary1)->len != RARRAY(ary2)->len) return Qfalse; + for (i=0; i<RARRAY(ary1)->len; i++) { + if (!rb_eql(rb_ary_elt(ary1, i), rb_ary_elt(ary2, i))) + return Qfalse; + } + return Qtrue; +} + +static VALUE +recursive_hash(ary, dummy, recur) + VALUE ary, dummy; + int recur; +{ + long i, h; + VALUE n; + + if (recur) { + return LONG2FIX(0); + } + h = RARRAY(ary)->len; + for (i=0; i<RARRAY(ary)->len; i++) { + h = (h << 1) | (h<0 ? 1 : 0); + n = rb_hash(RARRAY(ary)->ptr[i]); + h ^= NUM2LONG(n); + } + return LONG2FIX(h); +} + +/* + * call-seq: + * array.hash -> fixnum + * + * Compute a hash-code for this array. Two arrays with the same content + * will have the same hash code (and will compare using <code>eql?</code>). + */ + +static VALUE +rb_ary_hash(ary) + VALUE ary; +{ + return rb_exec_recursive(recursive_hash, ary, 0); +} + +/* + * call-seq: + * array.include?(obj) -> true or false + * + * Returns <code>true</code> if the given object is present in + * <i>self</i> (that is, if any object <code>==</code> <i>anObject</i>), + * <code>false</code> otherwise. + * + * a = [ "a", "b", "c" ] + * a.include?("b") #=> true + * a.include?("z") #=> false + */ + +VALUE +rb_ary_includes(ary, item) + VALUE ary; + VALUE item; +{ + long i; + + for (i=0; i<RARRAY(ary)->len; i++) { + if (rb_equal(RARRAY(ary)->ptr[i], item)) { + return Qtrue; + } + } + return Qfalse; +} + + +/* + * call-seq: + * array <=> other_array -> -1, 0, +1 + * + * Comparison---Returns an integer (-1, 0, + * or +1) if this array is less than, equal to, or greater than + * other_array. Each object in each array is compared + * (using <=>). If any value isn't + * equal, then that inequality is the return value. If all the + * values found are equal, then the return is based on a + * comparison of the array lengths. Thus, two arrays are + * ``equal'' according to <code>Array#<=></code> if and only if they have + * the same length and the value of each element is equal to the + * value of the corresponding element in the other array. + * + * [ "a", "a", "c" ] <=> [ "a", "b", "c" ] #=> -1 + * [ 1, 2, 3, 4, 5, 6 ] <=> [ 1, 2 ] #=> +1 + * + */ + +VALUE +rb_ary_cmp(ary1, ary2) + VALUE ary1, ary2; +{ + long i, len; + + ary2 = to_ary(ary2); + len = RARRAY(ary1)->len; + if (len > RARRAY(ary2)->len) { + len = RARRAY(ary2)->len; + } + for (i=0; i<len; i++) { + VALUE v = rb_funcall(rb_ary_elt(ary1, i), id_cmp, 1, rb_ary_elt(ary2, i)); + if (v != INT2FIX(0)) { + return v; + } + } + len = RARRAY(ary1)->len - RARRAY(ary2)->len; + if (len == 0) return INT2FIX(0); + if (len > 0) return INT2FIX(1); + return INT2FIX(-1); +} + +static VALUE +ary_make_hash(ary1, ary2) + VALUE ary1, ary2; +{ + VALUE hash = rb_hash_new(); + long i; + + for (i=0; i<RARRAY(ary1)->len; i++) { + rb_hash_aset(hash, RARRAY(ary1)->ptr[i], Qtrue); + } + if (ary2) { + for (i=0; i<RARRAY(ary2)->len; i++) { + rb_hash_aset(hash, RARRAY(ary2)->ptr[i], Qtrue); + } + } + return hash; +} + +/* + * call-seq: + * array - other_array -> an_array + * + * Array Difference---Returns a new array that is a copy of + * the original array, removing any items that also appear in + * other_array. (If you need set-like behavior, see the + * library class Set.) + * + * [ 1, 1, 2, 2, 3, 3, 4, 5 ] - [ 1, 2, 4 ] #=> [ 3, 3, 5 ] + */ + +static VALUE +rb_ary_diff(ary1, ary2) + VALUE ary1, ary2; +{ + VALUE ary3, hash; + long i; + + hash = ary_make_hash(to_ary(ary2), 0); + ary3 = rb_ary_new(); + + for (i=0; i<RARRAY(ary1)->len; i++) { + if (st_lookup(RHASH(hash)->tbl, RARRAY(ary1)->ptr[i], 0)) continue; + rb_ary_push(ary3, rb_ary_elt(ary1, i)); + } + return ary3; +} + +/* + * call-seq: + * array & other_array + * + * Set Intersection---Returns a new array + * containing elements common to the two arrays, with no duplicates. + * + * [ 1, 1, 3, 5 ] & [ 1, 2, 3 ] #=> [ 1, 3 ] + */ + + +static VALUE +rb_ary_and(ary1, ary2) + VALUE ary1, ary2; +{ + VALUE hash, ary3, v, vv; + long i; + + ary2 = to_ary(ary2); + ary3 = rb_ary_new2(RARRAY(ary1)->len < RARRAY(ary2)->len ? + RARRAY(ary1)->len : RARRAY(ary2)->len); + hash = ary_make_hash(ary2, 0); + + for (i=0; i<RARRAY(ary1)->len; i++) { + v = vv = rb_ary_elt(ary1, i); + if (st_delete(RHASH(hash)->tbl, (st_data_t*)&vv, 0)) { + rb_ary_push(ary3, v); + } + } + + return ary3; +} + +/* + * call-seq: + * array | other_array -> an_array + * + * Set Union---Returns a new array by joining this array with + * other_array, removing duplicates. + * + * [ "a", "b", "c" ] | [ "c", "d", "a" ] + * #=> [ "a", "b", "c", "d" ] + */ + +static VALUE +rb_ary_or(ary1, ary2) + VALUE ary1, ary2; +{ + VALUE hash, ary3; + VALUE v, vv; + long i; + + ary2 = to_ary(ary2); + ary3 = rb_ary_new2(RARRAY(ary1)->len+RARRAY(ary2)->len); + hash = ary_make_hash(ary1, ary2); + + for (i=0; i<RARRAY(ary1)->len; i++) { + v = vv = rb_ary_elt(ary1, i); + if (st_delete(RHASH(hash)->tbl, (st_data_t*)&vv, 0)) { + rb_ary_push(ary3, v); + } + } + for (i=0; i<RARRAY(ary2)->len; i++) { + v = vv = rb_ary_elt(ary2, i); + if (st_delete(RHASH(hash)->tbl, (st_data_t*)&vv, 0)) { + rb_ary_push(ary3, v); + } + } + return ary3; +} + +/* + * call-seq: + * array.uniq! -> array or nil + * + * Removes duplicate elements from _self_. + * Returns <code>nil</code> if no changes are made (that is, no + * duplicates are found). + * + * a = [ "a", "a", "b", "b", "c" ] + * a.uniq! #=> ["a", "b", "c"] + * b = [ "a", "b", "c" ] + * b.uniq! #=> nil + */ + +static VALUE +rb_ary_uniq_bang(ary) + VALUE ary; +{ + VALUE hash, v, vv; + long i, j; + + hash = ary_make_hash(ary, 0); + + if (RARRAY(ary)->len == RHASH(hash)->tbl->num_entries) { + return Qnil; + } + for (i=j=0; i<RARRAY(ary)->len; i++) { + v = vv = rb_ary_elt(ary, i); + if (st_delete(RHASH(hash)->tbl, (st_data_t*)&vv, 0)) { + rb_ary_store(ary, j++, v); + } + } + RARRAY(ary)->len = j; + + return ary; +} + +/* + * call-seq: + * array.uniq -> an_array + * + * Returns a new array by removing duplicate values in <i>self</i>. + * + * a = [ "a", "a", "b", "b", "c" ] + * a.uniq #=> ["a", "b", "c"] + */ + +static VALUE +rb_ary_uniq(ary) + VALUE ary; +{ + ary = rb_ary_dup(ary); + rb_ary_uniq_bang(ary); + return ary; +} + +/* + * call-seq: + * array.compact! -> array or nil + * + * Removes +nil+ elements from array. + * Returns +nil+ if no changes were made. + * + * [ "a", nil, "b", nil, "c" ].compact! #=> [ "a", "b", "c" ] + * [ "a", "b", "c" ].compact! #=> nil + */ + +static VALUE +rb_ary_compact_bang(ary) + VALUE ary; +{ + VALUE *p, *t, *end; + + rb_ary_modify(ary); + p = t = RARRAY(ary)->ptr; + end = p + RARRAY(ary)->len; + + while (t < end) { + if (NIL_P(*t)) t++; + else *p++ = *t++; + } + if (RARRAY(ary)->len == (p - RARRAY(ary)->ptr)) { + return Qnil; + } + RARRAY(ary)->len = RARRAY(ary)->aux.capa = (p - RARRAY(ary)->ptr); + REALLOC_N(RARRAY(ary)->ptr, VALUE, RARRAY(ary)->len); + + return ary; +} + +/* + * call-seq: + * array.compact -> an_array + * + * Returns a copy of _self_ with all +nil+ elements removed. + * + * [ "a", nil, "b", nil, "c", nil ].compact + * #=> [ "a", "b", "c" ] + */ + +static VALUE +rb_ary_compact(ary) + VALUE ary; +{ + ary = rb_ary_dup(ary); + rb_ary_compact_bang(ary); + return ary; +} + +/* + * call-seq: + * array.nitems -> int + * + * Returns the number of non-<code>nil</code> elements in _self_. + * May be zero. + * + * [ 1, nil, 3, nil, 5 ].nitems #=> 3 + */ + +static VALUE +rb_ary_nitems(ary) + VALUE ary; +{ + long n = 0; + VALUE *p, *pend; + + p = RARRAY(ary)->ptr; + pend = p + RARRAY(ary)->len; + + while (p < pend) { + if (!NIL_P(*p)) n++; + p++; + } + return LONG2NUM(n); +} + +static long +flatten(ary, idx, ary2, memo) + VALUE ary; + long idx; + VALUE ary2, memo; +{ + VALUE id; + long i = idx; + long n, lim = idx + RARRAY(ary2)->len; + + id = rb_obj_id(ary2); + if (rb_ary_includes(memo, id)) { + rb_raise(rb_eArgError, "tried to flatten recursive array"); + } + rb_ary_push(memo, id); + rb_ary_splice(ary, idx, 1, ary2); + while (i < lim) { + VALUE tmp; + + tmp = rb_check_array_type(rb_ary_elt(ary, i)); + if (!NIL_P(tmp)) { + n = flatten(ary, i, tmp, memo); + i += n; lim += n; + } + i++; + } + rb_ary_pop(memo); + + return lim - idx - 1; /* returns number of increased items */ +} + +/* + * call-seq: + * array.flatten! -> array or nil + * + * Flattens _self_ in place. + * Returns <code>nil</code> if no modifications were made (i.e., + * <i>array</i> contains no subarrays.) + * + * a = [ 1, 2, [3, [4, 5] ] ] + * a.flatten! #=> [1, 2, 3, 4, 5] + * a.flatten! #=> nil + * a #=> [1, 2, 3, 4, 5] + */ + +static VALUE +rb_ary_flatten_bang(ary) + VALUE ary; +{ + long i = 0; + int mod = 0; + VALUE memo = Qnil; + + while (i<RARRAY(ary)->len) { + VALUE ary2 = RARRAY(ary)->ptr[i]; + VALUE tmp; + + tmp = rb_check_array_type(ary2); + if (!NIL_P(tmp)) { + if (NIL_P(memo)) { + memo = rb_ary_new(); + } + i += flatten(ary, i, tmp, memo); + mod = 1; + } + i++; + } + if (mod == 0) return Qnil; + return ary; +} + +/* + * call-seq: + * array.flatten -> an_array + * + * Returns a new array that is a one-dimensional flattening of this + * array (recursively). That is, for every element that is an array, + * extract its elements into the new array. + * + * s = [ 1, 2, 3 ] #=> [1, 2, 3] + * t = [ 4, 5, 6, [7, 8] ] #=> [4, 5, 6, [7, 8]] + * a = [ s, t, 9, 10 ] #=> [[1, 2, 3], [4, 5, 6, [7, 8]], 9, 10] + * a.flatten #=> [1, 2, 3, 4, 5, 6, 7, 8, 9, 10 + */ + +static VALUE +rb_ary_flatten(ary) + VALUE ary; +{ + ary = rb_ary_dup(ary); + rb_ary_flatten_bang(ary); + return ary; +} + + +/* Arrays are ordered, integer-indexed collections of any object. + * Array indexing starts at 0, as in C or Java. A negative index is + * assumed to be relative to the end of the array---that is, an index of -1 + * indicates the last element of the array, -2 is the next to last + * element in the array, and so on. + */ + +void +Init_Array() +{ + rb_cArray = rb_define_class("Array", rb_cObject); + rb_include_module(rb_cArray, rb_mEnumerable); + + rb_define_alloc_func(rb_cArray, ary_alloc); + rb_define_singleton_method(rb_cArray, "[]", rb_ary_s_create, -1); + rb_define_method(rb_cArray, "initialize", rb_ary_initialize, -1); + rb_define_method(rb_cArray, "initialize_copy", rb_ary_replace, 1); + + rb_define_method(rb_cArray, "to_s", rb_ary_to_s, 0); + rb_define_method(rb_cArray, "inspect", rb_ary_inspect, 0); + rb_define_method(rb_cArray, "to_a", rb_ary_to_a, 0); + rb_define_method(rb_cArray, "to_ary", rb_ary_to_ary_m, 0); + rb_define_method(rb_cArray, "frozen?", rb_ary_frozen_p, 0); + + rb_define_method(rb_cArray, "==", rb_ary_equal, 1); + rb_define_method(rb_cArray, "eql?", rb_ary_eql, 1); + rb_define_method(rb_cArray, "hash", rb_ary_hash, 0); + + rb_define_method(rb_cArray, "[]", rb_ary_aref, -1); + rb_define_method(rb_cArray, "[]=", rb_ary_aset, -1); + rb_define_method(rb_cArray, "at", rb_ary_at, 1); + rb_define_method(rb_cArray, "fetch", rb_ary_fetch, -1); + rb_define_method(rb_cArray, "first", rb_ary_first, -1); + rb_define_method(rb_cArray, "last", rb_ary_last, -1); + rb_define_method(rb_cArray, "concat", rb_ary_concat, 1); + rb_define_method(rb_cArray, "<<", rb_ary_push, 1); + rb_define_method(rb_cArray, "push", rb_ary_push_m, -1); + rb_define_method(rb_cArray, "pop", rb_ary_pop_m, -1); + rb_define_method(rb_cArray, "shift", rb_ary_shift_m, -1); + rb_define_method(rb_cArray, "unshift", rb_ary_unshift_m, -1); + rb_define_method(rb_cArray, "insert", rb_ary_insert, -1); + rb_define_method(rb_cArray, "each", rb_ary_each, 0); + rb_define_method(rb_cArray, "each_index", rb_ary_each_index, 0); + rb_define_method(rb_cArray, "reverse_each", rb_ary_reverse_each, 0); + rb_define_method(rb_cArray, "length", rb_ary_length, 0); + rb_define_alias(rb_cArray, "size", "length"); + rb_define_method(rb_cArray, "empty?", rb_ary_empty_p, 0); + rb_define_method(rb_cArray, "index", rb_ary_index, -1); + rb_define_method(rb_cArray, "rindex", rb_ary_rindex, -1); + rb_define_method(rb_cArray, "join", rb_ary_join_m, -1); + rb_define_method(rb_cArray, "reverse", rb_ary_reverse_m, 0); + rb_define_method(rb_cArray, "reverse!", rb_ary_reverse_bang, 0); + rb_define_method(rb_cArray, "sort", rb_ary_sort, 0); + rb_define_method(rb_cArray, "sort!", rb_ary_sort_bang, 0); + rb_define_method(rb_cArray, "collect", rb_ary_collect, 0); + rb_define_method(rb_cArray, "collect!", rb_ary_collect_bang, 0); + rb_define_method(rb_cArray, "map", rb_ary_collect, 0); + rb_define_method(rb_cArray, "map!", rb_ary_collect_bang, 0); + rb_define_method(rb_cArray, "select", rb_ary_select, 0); + rb_define_method(rb_cArray, "values_at", rb_ary_values_at, -1); + rb_define_method(rb_cArray, "delete", rb_ary_delete, 1); + rb_define_method(rb_cArray, "delete_at", rb_ary_delete_at_m, 1); + rb_define_method(rb_cArray, "delete_if", rb_ary_delete_if, 0); + rb_define_method(rb_cArray, "reject", rb_ary_reject, 0); + rb_define_method(rb_cArray, "reject!", rb_ary_reject_bang, 0); + rb_define_method(rb_cArray, "zip", rb_ary_zip, -1); + rb_define_method(rb_cArray, "transpose", rb_ary_transpose, 0); + rb_define_method(rb_cArray, "replace", rb_ary_replace, 1); + rb_define_method(rb_cArray, "clear", rb_ary_clear, 0); + rb_define_method(rb_cArray, "fill", rb_ary_fill, -1); + rb_define_method(rb_cArray, "include?", rb_ary_includes, 1); + rb_define_method(rb_cArray, "<=>", rb_ary_cmp, 1); + + rb_define_method(rb_cArray, "slice", rb_ary_aref, -1); + rb_define_method(rb_cArray, "slice!", rb_ary_slice_bang, -1); + + rb_define_method(rb_cArray, "assoc", rb_ary_assoc, 1); + rb_define_method(rb_cArray, "rassoc", rb_ary_rassoc, 1); + + rb_define_method(rb_cArray, "+", rb_ary_plus, 1); + rb_define_method(rb_cArray, "*", rb_ary_times, 1); + + rb_define_method(rb_cArray, "-", rb_ary_diff, 1); + rb_define_method(rb_cArray, "&", rb_ary_and, 1); + rb_define_method(rb_cArray, "|", rb_ary_or, 1); + + rb_define_method(rb_cArray, "uniq", rb_ary_uniq, 0); + rb_define_method(rb_cArray, "uniq!", rb_ary_uniq_bang, 0); + rb_define_method(rb_cArray, "compact", rb_ary_compact, 0); + rb_define_method(rb_cArray, "compact!", rb_ary_compact_bang, 0); + rb_define_method(rb_cArray, "flatten", rb_ary_flatten, 0); + rb_define_method(rb_cArray, "flatten!", rb_ary_flatten_bang, 0); + rb_define_method(rb_cArray, "nitems", rb_ary_nitems, 0); + + id_cmp = rb_intern("<=>"); + + rb_cValues = rb_define_class("Values", rb_cArray); +} +/********************************************************************** + ascii.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +static int +ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else + return FALSE; +} + +OnigEncodingType OnigEncodingASCII = { + onigenc_single_byte_mbc_enc_len, + "US-ASCII", /* name */ + 1, /* max byte length */ + 1, /* min byte length */ + ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE, + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + }, + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + onigenc_ascii_mbc_to_normalize, + onigenc_ascii_is_mbc_ambiguous, + onigenc_ascii_get_all_pair_ambig_codes, + onigenc_nothing_get_all_comp_ambig_codes, + ascii_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; +/********************************************************************** + + bignum.c - + + $Author: matz $ + $Date: 2005/03/04 06:47:45 $ + created at: Fri Jun 10 00:48:55 JST 1994 + + Copyright (C) 1993-2003 Yukihiro Matsumoto + +**********************************************************************/ + +#include "ruby.h" + +#include <math.h> +#include <ctype.h> +#ifdef HAVE_IEEEFP_H +#include <ieeefp.h> +#endif + +VALUE rb_cBignum; + +#if defined __MINGW32__ +#define USHORT _USHORT +#endif + +#define BDIGITS(x) ((BDIGIT*)RBIGNUM(x)->digits) +#define BITSPERDIG (SIZEOF_BDIGITS*CHAR_BIT) +#define BIGRAD ((BDIGIT_DBL)1 << BITSPERDIG) +#define DIGSPERLONG ((unsigned int)(SIZEOF_LONG/SIZEOF_BDIGITS)) +#if HAVE_LONG_LONG +# define DIGSPERLL ((unsigned int)(SIZEOF_LONG_LONG/SIZEOF_BDIGITS)) +#endif +#define BIGUP(x) ((BDIGIT_DBL)(x) << BITSPERDIG) +#define BIGDN(x) RSHIFT(x,BITSPERDIG) +#define BIGLO(x) ((BDIGIT)((x) & (BIGRAD-1))) +#define BDIGMAX ((BDIGIT)-1) + +#define BIGZEROP(x) (RBIGNUM(x)->len == 0 || (RBIGNUM(x)->len == 1 && BDIGITS(x)[0] == 0)) + +static VALUE +bignew_1(klass, len, sign) + VALUE klass; + long len; + char sign; +{ + NEWOBJ(big, struct RBignum); + OBJSETUP(big, klass, T_BIGNUM); + big->sign = sign; + big->len = len; + big->digits = ALLOC_N(BDIGIT, len); + + return (VALUE)big; +} + +#define bignew(len,sign) bignew_1(rb_cBignum,len,sign) + +VALUE +rb_big_clone(x) + VALUE x; +{ + VALUE z = bignew_1(CLASS_OF(x), RBIGNUM(x)->len, RBIGNUM(x)->sign); + + MEMCPY(BDIGITS(z), BDIGITS(x), BDIGIT, RBIGNUM(x)->len); + return z; +} + +static void +get2comp(x, carry) /* get 2's complement */ + VALUE x; + int carry; +{ + long i = RBIGNUM(x)->len; + BDIGIT *ds = BDIGITS(x); + BDIGIT_DBL num; + + while (i--) ds[i] = ~ds[i]; + i = 0; num = 1; + do { + num += ds[i]; + ds[i++] = BIGLO(num); + num = BIGDN(num); + } while (i < RBIGNUM(x)->len); + if (!carry) return; + if ((ds[RBIGNUM(x)->len-1] & (1<<(BITSPERDIG-1))) == 0) { + REALLOC_N(RBIGNUM(x)->digits, BDIGIT, ++RBIGNUM(x)->len); + ds = BDIGITS(x); + ds[RBIGNUM(x)->len-1] = ~0; + } +} + +void +rb_big_2comp(x) /* get 2's complement */ + VALUE x; +{ + get2comp(x, Qtrue); +} + +static VALUE +bignorm(x) + VALUE x; +{ + if (!FIXNUM_P(x)) { + long len = RBIGNUM(x)->len; + BDIGIT *ds = BDIGITS(x); + + while (len-- && !ds[len]) ; + RBIGNUM(x)->len = ++len; + + if (len*SIZEOF_BDIGITS <= sizeof(VALUE)) { + long num = 0; + while (len--) { + num = BIGUP(num) + ds[len]; + } + if (num >= 0) { + if (RBIGNUM(x)->sign) { + if (POSFIXABLE(num)) return LONG2FIX(num); + } + else if (NEGFIXABLE(-(long)num)) return LONG2FIX(-(long)num); + } + } + } + return x; +} + +VALUE +rb_big_norm(x) + VALUE x; +{ + return bignorm(x); +} + +VALUE +rb_uint2big(n) + unsigned long n; +{ + BDIGIT_DBL num = n; + long i = 0; + BDIGIT *digits; + VALUE big; + + big = bignew(DIGSPERLONG, 1); + digits = BDIGITS(big); + while (i < DIGSPERLONG) { + digits[i++] = BIGLO(num); + num = BIGDN(num); + } + + i = DIGSPERLONG; + while (--i && !digits[i]) ; + RBIGNUM(big)->len = i+1; + return big; +} + +VALUE +rb_int2big(n) + long n; +{ + long neg = 0; + VALUE big; + + if (n < 0) { + n = -n; + neg = 1; + } + big = rb_uint2big(n); + if (neg) { + RBIGNUM(big)->sign = 0; + } + return big; +} + +VALUE +rb_uint2inum(n) + unsigned long n; +{ + if (POSFIXABLE(n)) return LONG2FIX(n); + return rb_uint2big(n); +} + +VALUE +rb_int2inum(n) + long n; +{ + if (FIXABLE(n)) return LONG2FIX(n); + return rb_int2big(n); +} + +#ifdef HAVE_LONG_LONG + +void +rb_quad_pack(buf, val) + char *buf; + VALUE val; +{ + LONG_LONG q; + + val = rb_to_int(val); + if (FIXNUM_P(val)) { + q = FIX2LONG(val); + } + else { + long len = RBIGNUM(val)->len; + BDIGIT *ds; + + if (len > SIZEOF_LONG_LONG/SIZEOF_BDIGITS) { + len = SIZEOF_LONG/SIZEOF_BDIGITS; + } + ds = BDIGITS(val); + q = 0; + while (len--) { + q = BIGUP(q); + q += ds[len]; + } + if (!RBIGNUM(val)->sign) q = -q; + } + memcpy(buf, (char*)&q, SIZEOF_LONG_LONG); +} + +VALUE +rb_quad_unpack(buf, sign) + const char *buf; + int sign; +{ + unsigned LONG_LONG q; + long neg = 0; + long i; + BDIGIT *digits; + VALUE big; + + memcpy(&q, buf, SIZEOF_LONG_LONG); + if (sign) { + if (FIXABLE((LONG_LONG)q)) return LONG2FIX((LONG_LONG)q); + if ((LONG_LONG)q < 0) { + q = -(LONG_LONG)q; + neg = 1; + } + } + else { + if (POSFIXABLE(q)) return LONG2FIX(q); + } + + i = 0; + big = bignew(DIGSPERLL, 1); + digits = BDIGITS(big); + while (i < DIGSPERLL) { + digits[i++] = BIGLO(q); + q = BIGDN(q); + } + + i = DIGSPERLL; + while (i-- && !digits[i]) ; + RBIGNUM(big)->len = i+1; + + if (neg) { + RBIGNUM(big)->sign = 0; + } + return bignorm(big); +} + +#else + +#define QUAD_SIZE 8 + +void +rb_quad_pack(buf, val) + char *buf; + VALUE val; +{ + long len; + + memset(buf, 0, QUAD_SIZE); + val = rb_to_int(val); + if (FIXNUM_P(val)) { + val = rb_int2big(FIX2LONG(val)); + } + len = RBIGNUM(val)->len * SIZEOF_BDIGITS; + if (len > QUAD_SIZE) { + rb_raise(rb_eRangeError, "bignum too big to convert into `quad int'"); + } + memcpy(buf, (char*)BDIGITS(val), len); + if (!RBIGNUM(val)->sign) { + len = QUAD_SIZE; + while (len--) { + *buf = ~*buf; + buf++; + } + } +} + +#define BNEG(b) (RSHIFT(((BDIGIT*)b)[QUAD_SIZE/SIZEOF_BDIGITS-1],BITSPERDIG-1) != 0) + +VALUE +rb_quad_unpack(buf, sign) + const char *buf; + int sign; +{ + VALUE big = bignew(QUAD_SIZE/SIZEOF_BDIGITS, 1); + + memcpy((char*)BDIGITS(big), buf, QUAD_SIZE); + if (sign && BNEG(buf)) { + long len = QUAD_SIZE; + char *tmp = (char*)BDIGITS(big); + + RBIGNUM(big)->sign = 0; + while (len--) { + *tmp = ~*tmp; + tmp++; + } + } + + return bignorm(big); +} + +#endif + +VALUE +rb_cstr_to_inum(str, base, badcheck) + const char *str; + int base; + int badcheck; +{ + const char *s = str; + char *end; + char sign = 1, nondigit = 0; + int c; + BDIGIT_DBL num; + long len, blen = 1; + long i; + VALUE z; + BDIGIT *zds; + + if (!str) { + if (badcheck) goto bad; + return INT2FIX(0); + } + if (badcheck) { + while (ISSPACE(*str)) str++; + } + else { + while (ISSPACE(*str) || *str == '_') str++; + } + + if (str[0] == '+') { + str++; + } + else if (str[0] == '-') { + str++; + sign = 0; + } + if (str[0] == '+' || str[0] == '-') { + if (badcheck) goto bad; + return INT2FIX(0); + } + if (base <= 0) { + if (str[0] == '0') { + switch (str[1]) { + case 'x': case 'X': + base = 16; + break; + case 'b': case 'B': + base = 2; + break; + case 'o': case 'O': + base = 8; + break; + case 'd': case 'D': + base = 10; + break; + default: + base = 8; + } + } + else if (base < -1) { + base = -base; + } + else { + base = 10; + } + } + switch (base) { + case 2: + len = 1; + if (str[0] == '0' && (str[1] == 'b'||str[1] == 'B')) { + str += 2; + } + break; + case 3: + len = 2; + break; + case 8: + if (str[0] == '0' && (str[1] == 'o'||str[1] == 'O')) { + str += 2; + } + case 4: case 5: case 6: case 7: + len = 3; + break; + case 10: + if (str[0] == '0' && (str[1] == 'd'||str[1] == 'D')) { + str += 2; + } + case 9: case 11: case 12: case 13: case 14: case 15: + len = 4; + break; + case 16: + len = 4; + if (str[0] == '0' && (str[1] == 'x'||str[1] == 'X')) { + str += 2; + } + break; + default: + if (base < 2 || 36 < base) { + rb_raise(rb_eArgError, "illegal radix %d", base); + } + if (base <= 32) { + len = 5; + } + else { + len = 6; + } + break; + } + if (*str == '0') { /* squeeze preceeding 0s */ + while (*++str == '0'); + --str; + } + len *= strlen(str)*sizeof(char); + + if (len <= (sizeof(VALUE)*CHAR_BIT)) { + unsigned long val = strtoul((char*)str, &end, base); + + if (*end == '_') goto bigparse; + if (badcheck) { + if (end == str) goto bad; /* no number */ + while (*end && ISSPACE(*end)) end++; + if (*end) goto bad; /* trailing garbage */ + } + + if (POSFIXABLE(val)) { + if (sign) return LONG2FIX(val); + else { + long result = -(long)val; + return LONG2FIX(result); + } + } + else { + VALUE big = rb_uint2big(val); + RBIGNUM(big)->sign = sign; + return bignorm(big); + } + } + bigparse: + len = (len/BITSPERDIG)+1; + if (badcheck && *str == '_') goto bad; + + z = bignew(len, sign); + zds = BDIGITS(z); + for (i=len;i--;) zds[i]=0; + while (c = *str++) { + if (c == '_') { + if (badcheck) { + if (nondigit) goto bad; + nondigit = c; + } + continue; + } + else if (!ISASCII(c)) { + break; + } + else if (isdigit(c)) { + c -= '0'; + } + else if (islower(c)) { + c -= 'a' - 10; + } + else if (isupper(c)) { + c -= 'A' - 10; + } + else { + break; + } + if (c >= base) break; + nondigit = 0; + i = 0; + num = c; + for (;;) { + while (i<blen) { + num += (BDIGIT_DBL)zds[i]*base; + zds[i++] = BIGLO(num); + num = BIGDN(num); + } + if (num) { + blen++; + continue; + } + break; + } + } + if (badcheck) { + str--; + if (s+1 < str && str[-1] == '_') goto bad; + while (*str && ISSPACE(*str)) str++; + if (*str) { + bad: + rb_invalid_str(s, "Integer"); + } + } + + return bignorm(z); +} + +VALUE +rb_str_to_inum(str, base, badcheck) + VALUE str; + int base; + int badcheck; +{ + char *s; + long len; + + StringValue(str); + if (badcheck) { + s = StringValueCStr(str); + } + else { + s = RSTRING(str)->ptr; + } + if (s) { + len = RSTRING(str)->len; + if (s[len]) { /* no sentinel somehow */ + char *p = ALLOCA_N(char, len+1); + + MEMCPY(p, s, char, len); + p[len] = '\0'; + s = p; + } + } + return rb_cstr_to_inum(s, base, badcheck); +} + +#if HAVE_LONG_LONG + +VALUE +rb_ull2big(n) + unsigned LONG_LONG n; +{ + BDIGIT_DBL num = n; + long i = 0; + BDIGIT *digits; + VALUE big; + + big = bignew(DIGSPERLL, 1); + digits = BDIGITS(big); + while (i < DIGSPERLL) { + digits[i++] = BIGLO(num); + num = BIGDN(num); + } + + i = DIGSPERLL; + while (i-- && !digits[i]) ; + RBIGNUM(big)->len = i+1; + return big; +} + +VALUE +rb_ll2big(n) + LONG_LONG n; +{ + long neg = 0; + VALUE big; + + if (n < 0) { + n = -n; + neg = 1; + } + big = rb_ull2big(n); + if (neg) { + RBIGNUM(big)->sign = 0; + } + return big; +} + +VALUE +rb_ull2inum(n) + unsigned LONG_LONG n; +{ + if (POSFIXABLE(n)) return LONG2FIX(n); + return rb_ull2big(n); +} + +VALUE +rb_ll2inum(n) + LONG_LONG n; +{ + if (FIXABLE(n)) return LONG2FIX(n); + return rb_ll2big(n); +} + +#endif /* HAVE_LONG_LONG */ + +VALUE +rb_cstr2inum(str, base) + const char *str; + int base; +{ + return rb_cstr_to_inum(str, base, base==0); +} + +VALUE +rb_str2inum(str, base) + VALUE str; + int base; +{ + return rb_str_to_inum(str, base, base==0); +} + +const char ruby_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz"; +VALUE +rb_big2str(x, base) + VALUE x; + int base; +{ + volatile VALUE t; + BDIGIT *ds; + long i, j, hbase; + VALUE ss; + char *s, c; + + if (FIXNUM_P(x)) { + return rb_fix2str(x, base); + } + i = RBIGNUM(x)->len; + if (BIGZEROP(x)) { + return rb_str_new2("0"); + } + j = SIZEOF_BDIGITS*CHAR_BIT*i; + switch (base) { + case 2: break; + case 3: + j = j * 647L / 1024; + break; + case 4: case 5: case 6: case 7: + j /= 2; + break; + case 8: case 9: + j /= 3; + break; + case 10: case 11: case 12: case 13: case 14: case 15: + j = j * 241L / 800; + break; + case 16: case 17: case 18: case 19: case 20: case 21: + case 22: case 23: case 24: case 25: case 26: case 27: + case 28: case 29: case 30: case 31: + j /= 4; + break; + case 32: case 33: case 34: case 35: case 36: + j /= 5; + break; + default: + rb_raise(rb_eArgError, "illegal radix %d", base); + break; + } + j += 2; + + hbase = base * base; +#if SIZEOF_BDIGITS > 2 + hbase *= hbase; +#endif + + t = rb_big_clone(x); + ds = BDIGITS(t); + ss = rb_str_new(0, j); + s = RSTRING(ss)->ptr; + + s[0] = RBIGNUM(x)->sign ? '+' : '-'; + while (i && j) { + long k = i; + BDIGIT_DBL num = 0; + + while (k--) { + num = BIGUP(num) + ds[k]; + ds[k] = (BDIGIT)(num / hbase); + num %= hbase; + } + if (ds[i-1] == 0) i--; + k = SIZEOF_BDIGITS; + while (k--) { + c = (char)(num % base); + s[--j] = ruby_digitmap[(int)c]; + num /= base; + if (i == 0 && num == 0) break; + } + } + while (s[j] == '0') j++; + RSTRING(ss)->len -= RBIGNUM(x)->sign?j:j-1; + memmove(RBIGNUM(x)->sign?s:s+1, s+j, RSTRING(ss)->len); + s[RSTRING(ss)->len] = '\0'; + + return ss; +} + +/* + * call-seq: + * big.to_s(base=10) => string + * + * Returns a string containing the representation of <i>big</i> radix + * <i>base</i> (2 through 36). + * + * 12345654321.to_s #=> "12345654321" + * 12345654321.to_s(2) #=> "1011011111110110111011110000110001" + * 12345654321.to_s(8) #=> "133766736061" + * 12345654321.to_s(16) #=> "2dfdbbc31" + * 78546939656932.to_s(36) #=> "rubyrules" + */ + +static VALUE +rb_big_to_s(argc, argv, x) + int argc; + VALUE *argv; + VALUE x; +{ + VALUE b; + int base; + + rb_scan_args(argc, argv, "01", &b); + if (argc == 0) base = 10; + else base = NUM2INT(b); + return rb_big2str(x, base); +} + +static unsigned long +big2ulong(x, type, check) + VALUE x; + char *type; + int check; +{ + long len = RBIGNUM(x)->len; + BDIGIT_DBL num; + BDIGIT *ds; + + if (len > SIZEOF_LONG/SIZEOF_BDIGITS) { + if (check) + rb_raise(rb_eRangeError, "bignum too big to convert into `%s'", type); + len = SIZEOF_LONG/SIZEOF_BDIGITS; + } + ds = BDIGITS(x); + num = 0; + while (len--) { + num = BIGUP(num); + num += ds[len]; + } + return num; +} + +unsigned long +rb_big2ulong_pack(x) + VALUE x; +{ + unsigned long num = big2ulong(x, "unsigned long", Qfalse); + if (!RBIGNUM(x)->sign) { + return -num; + } + return num; +} + +unsigned long +rb_big2ulong(x) + VALUE x; +{ + unsigned long num = big2ulong(x, "unsigned long", Qtrue); + + if (!RBIGNUM(x)->sign) { + if ((long)num < 0) { + rb_raise(rb_eRangeError, "bignum out of range of unsigned long"); + } + return -num; + } + return num; +} + +long +rb_big2long(x) + VALUE x; +{ + unsigned long num = big2ulong(x, "long", Qtrue); + + if ((long)num < 0 && (RBIGNUM(x)->sign || (long)num != LONG_MIN)) { + rb_raise(rb_eRangeError, "bignum too big to convert into `long'"); + } + if (!RBIGNUM(x)->sign) return -(long)num; + return num; +} + +#if HAVE_LONG_LONG + +static unsigned LONG_LONG +big2ull(x, type) + VALUE x; + char *type; +{ + long len = RBIGNUM(x)->len; + BDIGIT_DBL num; + BDIGIT *ds; + + if (len > SIZEOF_LONG_LONG/SIZEOF_BDIGITS) + rb_raise(rb_eRangeError, "bignum too big to convert into `%s'", type); + ds = BDIGITS(x); + num = 0; + while (len--) { + num = BIGUP(num); + num += ds[len]; + } + return num; +} + +unsigned LONG_LONG +rb_big2ull(x) + VALUE x; +{ + unsigned LONG_LONG num = big2ull(x, "unsigned long long"); + + if (!RBIGNUM(x)->sign) return -num; + return num; +} + +LONG_LONG +rb_big2ll(x) + VALUE x; +{ + unsigned LONG_LONG num = big2ull(x, "long long"); + + if ((LONG_LONG)num < 0 && (RBIGNUM(x)->sign + || (LONG_LONG)num != LLONG_MIN)) { + rb_raise(rb_eRangeError, "bignum too big to convert into `long long'"); + } + if (!RBIGNUM(x)->sign) return -(LONG_LONG)num; + return num; +} + +#endif /* HAVE_LONG_LONG */ + +static VALUE +dbl2big(d) + double d; +{ + long i = 0; + BDIGIT c; + BDIGIT *digits; + VALUE z; + double u = (d < 0)?-d:d; + + if (isinf(d)) { + rb_raise(rb_eFloatDomainError, d < 0 ? "-Infinity" : "Infinity"); + } + if (isnan(d)) { + rb_raise(rb_eFloatDomainError, "NaN"); + } + + while (!POSFIXABLE(u) || 0 != (long)u) { + u /= (double)(BIGRAD); + i++; + } + z = bignew(i, d>=0); + digits = BDIGITS(z); + while (i--) { + u *= BIGRAD; + c = (BDIGIT)u; + u -= c; + digits[i] = c; + } + + return z; +} + +VALUE +rb_dbl2big(d) + double d; +{ + return bignorm(dbl2big(d)); +} + +double +rb_big2dbl(x) + VALUE x; +{ + double d = 0.0; + long i = RBIGNUM(x)->len; + BDIGIT *ds = BDIGITS(x); + + while (i--) { + d = ds[i] + BIGRAD*d; + } + if (isinf(d)) { + rb_warn("Bignum out of Float range"); + d = HUGE_VAL; + } + if (!RBIGNUM(x)->sign) d = -d; + return d; +} + +/* + * call-seq: + * big.to_f -> float + * + * Converts <i>big</i> to a <code>Float</code>. If <i>big</i> doesn't + * fit in a <code>Float</code>, the result is infinity. + * + */ + +static VALUE +rb_big_to_f(x) + VALUE x; +{ + return rb_float_new(rb_big2dbl(x)); +} + +/* + * call-seq: + * big <=> numeric => -1, 0, +1 + * + * Comparison---Returns -1, 0, or +1 depending on whether <i>big</i> is + * less than, equal to, or greater than <i>numeric</i>. This is the + * basis for the tests in <code>Comparable</code>. + * + */ + +static VALUE +rb_big_cmp(x, y) + VALUE x, y; +{ + long xlen = RBIGNUM(x)->len; + + switch (TYPE(y)) { + case T_FIXNUM: + y = rb_int2big(FIX2LONG(y)); + break; + + case T_BIGNUM: + break; + + case T_FLOAT: + return rb_dbl_cmp(rb_big2dbl(x), RFLOAT(y)->value); + + default: + return rb_num_coerce_cmp(x, y); + } + + if (RBIGNUM(x)->sign > RBIGNUM(y)->sign) return INT2FIX(1); + if (RBIGNUM(x)->sign < RBIGNUM(y)->sign) return INT2FIX(-1); + if (xlen < RBIGNUM(y)->len) + return (RBIGNUM(x)->sign) ? INT2FIX(-1) : INT2FIX(1); + if (xlen > RBIGNUM(y)->len) + return (RBIGNUM(x)->sign) ? INT2FIX(1) : INT2FIX(-1); + + while(xlen-- && (BDIGITS(x)[xlen]==BDIGITS(y)[xlen])); + if (-1 == xlen) return INT2FIX(0); + return (BDIGITS(x)[xlen] > BDIGITS(y)[xlen]) ? + (RBIGNUM(x)->sign ? INT2FIX(1) : INT2FIX(-1)) : + (RBIGNUM(x)->sign ? INT2FIX(-1) : INT2FIX(1)); +} + +/* + * call-seq: + * big == obj => true or false + * + * Returns <code>true</code> only if <i>obj</i> has the same value + * as <i>big</i>. Contrast this with <code>Bignum#eql?</code>, which + * requires <i>obj</i> to be a <code>Bignum</code>. + * + * 68719476736 == 68719476736.0 #=> true + */ + +static VALUE +rb_big_eq(x, y) + VALUE x, y; +{ + switch (TYPE(y)) { + case T_FIXNUM: + y = rb_int2big(FIX2LONG(y)); + break; + case T_BIGNUM: + break; + case T_FLOAT: + { + volatile double a, b; + + a = RFLOAT(y)->value; + b = rb_big2dbl(x); + if (isnan(a) || isnan(b)) return Qfalse; + return (a == b)?Qtrue:Qfalse; + } + default: + return rb_equal(y, x); + } + if (RBIGNUM(x)->sign != RBIGNUM(y)->sign) return Qfalse; + if (RBIGNUM(x)->len != RBIGNUM(y)->len) return Qfalse; + if (MEMCMP(BDIGITS(x),BDIGITS(y),BDIGIT,RBIGNUM(y)->len) != 0) return Qfalse; + return Qtrue; +} + +/* + * call-seq: + * big.eql?(obj) => true or false + * + * Returns <code>true</code> only if <i>obj</i> is a + * <code>Bignum</code> with the same value as <i>big</i>. Contrast this + * with <code>Bignum#==</code>, which performs type conversions. + * + * 68719476736.eql?(68719476736.0) #=> false + */ + +static VALUE +rb_big_eql(x, y) + VALUE x, y; +{ + if (TYPE(y) != T_BIGNUM) return Qfalse; + if (RBIGNUM(x)->sign != RBIGNUM(y)->sign) return Qfalse; + if (RBIGNUM(x)->len != RBIGNUM(y)->len) return Qfalse; + if (MEMCMP(BDIGITS(x),BDIGITS(y),BDIGIT,RBIGNUM(y)->len) != 0) return Qfalse; + return Qtrue; +} + +/* + * call-seq: + * -big => other_big + * + * Unary minus (returns a new Bignum whose value is 0-big) + */ + +static VALUE +rb_big_uminus(x) + VALUE x; +{ + VALUE z = rb_big_clone(x); + + RBIGNUM(z)->sign = !RBIGNUM(x)->sign; + + return bignorm(z); +} + +/* + * call-seq: + * ~big => integer + * + * Inverts the bits in big. As Bignums are conceptually infinite + * length, the result acts as if it had an infinite number of one + * bits to the left. In hex representations, this is displayed + * as two periods to the left of the digits. + * + * sprintf("%X", ~0x1122334455) #=> "..FEEDDCCBBAA" + */ + +static VALUE +rb_big_neg(x) + VALUE x; +{ + VALUE z = rb_big_clone(x); + long i = RBIGNUM(x)->len; + BDIGIT *ds = BDIGITS(z); + + if (!RBIGNUM(x)->sign) get2comp(z, Qtrue); + while (i--) ds[i] = ~ds[i]; + if (RBIGNUM(x)->sign) get2comp(z, Qfalse); + RBIGNUM(z)->sign = !RBIGNUM(z)->sign; + + return bignorm(z); +} + +static VALUE +bigsub(x, y) + VALUE x, y; +{ + VALUE z = 0; + BDIGIT *zds; + BDIGIT_DBL_SIGNED num; + long i = RBIGNUM(x)->len; + + /* if x is larger than y, swap */ + if (RBIGNUM(x)->len < RBIGNUM(y)->len) { + z = x; x = y; y = z; /* swap x y */ + } + else if (RBIGNUM(x)->len == RBIGNUM(y)->len) { + while (i > 0) { + i--; + if (BDIGITS(x)[i] > BDIGITS(y)[i]) { + break; + } + if (BDIGITS(x)[i] < BDIGITS(y)[i]) { + z = x; x = y; y = z; /* swap x y */ + break; + } + } + } + + z = bignew(RBIGNUM(x)->len, (z == 0)?1:0); + zds = BDIGITS(z); + + for (i = 0, num = 0; i < RBIGNUM(y)->len; i++) { + num += (BDIGIT_DBL_SIGNED)BDIGITS(x)[i] - BDIGITS(y)[i]; + zds[i] = BIGLO(num); + num = BIGDN(num); + } + while (num && i < RBIGNUM(x)->len) { + num += BDIGITS(x)[i]; + zds[i++] = BIGLO(num); + num = BIGDN(num); + } + while (i < RBIGNUM(x)->len) { + zds[i] = BDIGITS(x)[i]; + i++; + } + + return z; +} + +static VALUE +bigadd(x, y, sign) + VALUE x, y; + char sign; +{ + VALUE z; + BDIGIT_DBL num; + long i, len; + + sign = (sign == RBIGNUM(y)->sign); + if (RBIGNUM(x)->sign != sign) { + if (sign) return bigsub(y, x); + return bigsub(x, y); + } + + if (RBIGNUM(x)->len > RBIGNUM(y)->len) { + len = RBIGNUM(x)->len + 1; + z = x; x = y; y = z; + } + else { + len = RBIGNUM(y)->len + 1; + } + z = bignew(len, sign); + + len = RBIGNUM(x)->len; + for (i = 0, num = 0; i < len; i++) { + num += (BDIGIT_DBL)BDIGITS(x)[i] + BDIGITS(y)[i]; + BDIGITS(z)[i] = BIGLO(num); + num = BIGDN(num); + } + len = RBIGNUM(y)->len; + while (num && i < len) { + num += BDIGITS(y)[i]; + BDIGITS(z)[i++] = BIGLO(num); + num = BIGDN(num); + } + while (i < len) { + BDIGITS(z)[i] = BDIGITS(y)[i]; + i++; + } + BDIGITS(z)[i] = (BDIGIT)num; + + return z; +} + +/* + * call-seq: + * big + other => Numeric + * + * Adds big and other, returning the result. + */ + +VALUE +rb_big_plus(x, y) + VALUE x, y; +{ + switch (TYPE(y)) { + case T_FIXNUM: + y = rb_int2big(FIX2LONG(y)); + /* fall through */ + case T_BIGNUM: + return bignorm(bigadd(x, y, 1)); + + case T_FLOAT: + return rb_float_new(rb_big2dbl(x) + RFLOAT(y)->value); + + default: + return rb_num_coerce_bin(x, y); + } +} + +/* + * call-seq: + * big - other => Numeric + * + * Subtracts other from big, returning the result. + */ + +VALUE +rb_big_minus(x, y) + VALUE x, y; +{ + switch (TYPE(y)) { + case T_FIXNUM: + y = rb_int2big(FIX2LONG(y)); + /* fall through */ + case T_BIGNUM: + return bignorm(bigadd(x, y, 0)); + + case T_FLOAT: + return rb_float_new(rb_big2dbl(x) - RFLOAT(y)->value); + + default: + return rb_num_coerce_bin(x, y); + } +} + +/* + * call-seq: + * big * other => Numeric + * + * Multiplies big and other, returning the result. + */ + +VALUE +rb_big_mul(x, y) + VALUE x, y; +{ + long i, j; + BDIGIT_DBL n = 0; + VALUE z; + BDIGIT *zds; + + if (FIXNUM_P(x)) x = rb_int2big(FIX2LONG(x)); + switch (TYPE(y)) { + case T_FIXNUM: + y = rb_int2big(FIX2LONG(y)); + break; + + case T_BIGNUM: + break; + + case T_FLOAT: + return rb_float_new(rb_big2dbl(x) * RFLOAT(y)->value); + + default: + return rb_num_coerce_bin(x, y); + } + + j = RBIGNUM(x)->len + RBIGNUM(y)->len + 1; + z = bignew(j, RBIGNUM(x)->sign==RBIGNUM(y)->sign); + zds = BDIGITS(z); + while (j--) zds[j] = 0; + for (i = 0; i < RBIGNUM(x)->len; i++) { + BDIGIT_DBL dd = BDIGITS(x)[i]; + if (dd == 0) continue; + n = 0; + for (j = 0; j < RBIGNUM(y)->len; j++) { + BDIGIT_DBL ee = n + (BDIGIT_DBL)dd * BDIGITS(y)[j]; + n = zds[i + j] + ee; + if (ee) zds[i + j] = BIGLO(n); + n = BIGDN(n); + } + if (n) { + zds[i + j] = n; + } + } + + return bignorm(z); +} + +static void +bigdivrem(x, y, divp, modp) + VALUE x, y; + VALUE *divp, *modp; +{ + long nx = RBIGNUM(x)->len, ny = RBIGNUM(y)->len; + long i, j; + VALUE yy, z; + BDIGIT *xds, *yds, *zds, *tds; + BDIGIT_DBL t2; + BDIGIT_DBL_SIGNED num; + BDIGIT dd, q; + + if (BIGZEROP(y)) rb_num_zerodiv(); + yds = BDIGITS(y); + if (nx < ny || (nx == ny && BDIGITS(x)[nx - 1] < BDIGITS(y)[ny - 1])) { + if (divp) *divp = rb_int2big(0); + if (modp) *modp = x; + return; + } + xds = BDIGITS(x); + if (ny == 1) { + dd = yds[0]; + z = rb_big_clone(x); + zds = BDIGITS(z); + t2 = 0; i = nx; + while (i--) { + t2 = BIGUP(t2) + zds[i]; + zds[i] = (BDIGIT)(t2 / dd); + t2 %= dd; + } + RBIGNUM(z)->sign = RBIGNUM(x)->sign==RBIGNUM(y)->sign; + if (modp) { + *modp = rb_uint2big((unsigned long)t2); + RBIGNUM(*modp)->sign = RBIGNUM(x)->sign; + } + if (divp) *divp = z; + return; + } + z = bignew(nx==ny?nx+2:nx+1, RBIGNUM(x)->sign==RBIGNUM(y)->sign); + zds = BDIGITS(z); + if (nx==ny) zds[nx+1] = 0; + while (!yds[ny-1]) ny--; + + dd = 0; + q = yds[ny-1]; + while ((q & (1<<(BITSPERDIG-1))) == 0) { + q <<= 1; + dd++; + } + if (dd) { + yy = rb_big_clone(y); + tds = BDIGITS(yy); + j = 0; + t2 = 0; + while (j<ny) { + t2 += (BDIGIT_DBL)yds[j]<<dd; + tds[j++] = BIGLO(t2); + t2 = BIGDN(t2); + } + yds = tds; + j = 0; + t2 = 0; + while (j<nx) { + t2 += (BDIGIT_DBL)xds[j]<<dd; + zds[j++] = BIGLO(t2); + t2 = BIGDN(t2); + } + zds[j] = (BDIGIT)t2; + } + else { + zds[nx] = 0; + j = nx; + while (j--) zds[j] = xds[j]; + } + + j = nx==ny?nx+1:nx; + do { + if (zds[j] == yds[ny-1]) q = BIGRAD-1; + else q = (BDIGIT)((BIGUP(zds[j]) + zds[j-1])/yds[ny-1]); + if (q) { + i = 0; num = 0; t2 = 0; + do { /* multiply and subtract */ + BDIGIT_DBL ee; + t2 += (BDIGIT_DBL)yds[i] * q; + ee = num - BIGLO(t2); + num = (BDIGIT_DBL)zds[j - ny + i] + ee; + if (ee) zds[j - ny + i] = BIGLO(num); + num = BIGDN(num); + t2 = BIGDN(t2); + } while (++i < ny); + num += zds[j - ny + i] - t2;/* borrow from high digit; don't update */ + while (num) { /* "add back" required */ + i = 0; num = 0; q--; + do { + BDIGIT_DBL ee = num + yds[i]; + num = (BDIGIT_DBL)zds[j - ny + i] + ee; + if (ee) zds[j - ny + i] = BIGLO(num); + num = BIGDN(num); + } while (++i < ny); + num--; + } + } + zds[j] = q; + } while (--j >= ny); + if (divp) { /* move quotient down in z */ + *divp = rb_big_clone(z); + zds = BDIGITS(*divp); + j = (nx==ny ? nx+2 : nx+1) - ny; + for (i = 0;i < j;i++) zds[i] = zds[i+ny]; + RBIGNUM(*divp)->len = i; + } + if (modp) { /* normalize remainder */ + *modp = rb_big_clone(z); + zds = BDIGITS(*modp); + while (--ny && !zds[ny]); ++ny; + if (dd) { + t2 = 0; i = ny; + while(i--) { + t2 = (t2 | zds[i]) >> dd; + q = zds[i]; + zds[i] = BIGLO(t2); + t2 = BIGUP(q); + } + } + RBIGNUM(*modp)->len = ny; + RBIGNUM(*modp)->sign = RBIGNUM(x)->sign; + } +} + +static void +bigdivmod(x, y, divp, modp) + VALUE x, y; + VALUE *divp, *modp; +{ + VALUE mod; + + bigdivrem(x, y, divp, &mod); + if (RBIGNUM(x)->sign != RBIGNUM(y)->sign && !BIGZEROP(mod)) { + if (divp) *divp = bigadd(*divp, rb_int2big(1), 0); + if (modp) *modp = bigadd(mod, y, 1); + } + else { + if (divp) *divp = *divp; + if (modp) *modp = mod; + } +} + +/* + * call-seq: + * big / other => Numeric + * big.div(other) => Numeric + * + * Divides big by other, returning the result. + */ + +static VALUE +rb_big_div(x, y) + VALUE x, y; +{ + VALUE z; + + switch (TYPE(y)) { + case T_FIXNUM: + y = rb_int2big(FIX2LONG(y)); + break; + + case T_BIGNUM: + break; + + case T_FLOAT: + return rb_float_new(rb_big2dbl(x) / RFLOAT(y)->value); + + default: + return rb_num_coerce_bin(x, y); + } + bigdivmod(x, y, &z, 0); + + return bignorm(z); +} + +/* + * call-seq: + * big % other => Numeric + * big.modulo(other) => Numeric + * + * Returns big modulo other. See Numeric.divmod for more + * information. + */ + +static VALUE +rb_big_modulo(x, y) + VALUE x, y; +{ + VALUE z; + + switch (TYPE(y)) { + case T_FIXNUM: + y = rb_int2big(FIX2LONG(y)); + break; + + case T_BIGNUM: + break; + + default: + return rb_num_coerce_bin(x, y); + } + bigdivmod(x, y, 0, &z); + + return bignorm(z); +} + +/* + * call-seq: + * big.remainder(numeric) => number + * + * Returns the remainder after dividing <i>big</i> by <i>numeric</i>. + * + * -1234567890987654321.remainder(13731) #=> -6966 + * -1234567890987654321.remainder(13731.24) #=> -9906.22531493148 + */ +static VALUE +rb_big_remainder(x, y) + VALUE x, y; +{ + VALUE z; + + switch (TYPE(y)) { + case T_FIXNUM: + y = rb_int2big(FIX2LONG(y)); + break; + + case T_BIGNUM: + break; + + default: + return rb_num_coerce_bin(x, y); + } + bigdivrem(x, y, 0, &z); + + return bignorm(z); +} + +/* + * call-seq: + * big.divmod(numeric) => array + * + * See <code>Numeric#divmod</code>. + * + */ +VALUE +rb_big_divmod(x, y) + VALUE x, y; +{ + VALUE div, mod; + + switch (TYPE(y)) { + case T_FIXNUM: + y = rb_int2big(FIX2LONG(y)); + break; + + case T_BIGNUM: + break; + + default: + return rb_num_coerce_bin(x, y); + } + bigdivmod(x, y, &div, &mod); + + return rb_assoc_new(bignorm(div), bignorm(mod)); +} + +/* + * call-seq: + * big.quo(numeric) -> float + * + * Returns the floating point result of dividing <i>big</i> by + * <i>numeric</i>. + * + * -1234567890987654321.quo(13731) #=> -89910996357705.5 + * -1234567890987654321.quo(13731.24) #=> -89909424858035.7 + * + */ + +static VALUE +rb_big_quo(x, y) + VALUE x, y; +{ + double dx = rb_big2dbl(x); + double dy; + + switch (TYPE(y)) { + case T_FIXNUM: + dy = (double)FIX2LONG(y); + break; + + case T_BIGNUM: + dy = rb_big2dbl(y); + break; + + case T_FLOAT: + dy = RFLOAT(y)->value; + break; + + default: + return rb_num_coerce_bin(x, y); + } + return rb_float_new(dx / dy); +} + +/* + * call-seq: + * big ** exponent #=> numeric + * + * Raises _big_ to the _exponent_ power (which may be an integer, float, + * or anything that will coerce to a number). The result may be + * a Fixnum, Bignum, or Float + * + * 123456789 ** 2 #=> 15241578750190521 + * 123456789 ** 1.2 #=> 5126464716.09932 + * 123456789 ** -2 #=> 6.5610001194102e-17 + */ + +VALUE +rb_big_pow(x, y) + VALUE x, y; +{ + double d; + long yy; + + if (y == INT2FIX(0)) return INT2FIX(1); + switch (TYPE(y)) { + case T_FLOAT: + d = RFLOAT(y)->value; + break; + + case T_BIGNUM: + rb_warn("in a**b, b may be too big"); + d = rb_big2dbl(y); + break; + + case T_FIXNUM: + yy = FIX2LONG(y); + if (yy > 0) { + VALUE z = x; + + for (;;) { + yy -= 1; + if (yy == 0) break; + while (yy % 2 == 0) { + yy /= 2; + x = rb_big_mul(x, x); + } + z = rb_big_mul(z, x); + } + return bignorm(z); + } + d = (double)yy; + break; + + default: + return rb_num_coerce_bin(x, y); + } + return rb_float_new(pow(rb_big2dbl(x), d)); +} + +/* + * call-seq: + * big & numeric => integer + * + * Performs bitwise +and+ between _big_ and _numeric_. + */ + +VALUE +rb_big_and(xx, yy) + VALUE xx, yy; +{ + volatile VALUE x, y, z; + BDIGIT *ds1, *ds2, *zds; + long i, l1, l2; + char sign; + + x = xx; + y = rb_to_int(yy); + if (FIXNUM_P(y)) { + y = rb_int2big(FIX2LONG(y)); + } + if (!RBIGNUM(y)->sign) { + y = rb_big_clone(y); + get2comp(y, Qtrue); + } + if (!RBIGNUM(x)->sign) { + x = rb_big_clone(x); + get2comp(x, Qtrue); + } + if (RBIGNUM(x)->len > RBIGNUM(y)->len) { + l1 = RBIGNUM(y)->len; + l2 = RBIGNUM(x)->len; + ds1 = BDIGITS(y); + ds2 = BDIGITS(x); + sign = RBIGNUM(y)->sign; + } + else { + l1 = RBIGNUM(x)->len; + l2 = RBIGNUM(y)->len; + ds1 = BDIGITS(x); + ds2 = BDIGITS(y); + sign = RBIGNUM(x)->sign; + } + z = bignew(l2, RBIGNUM(x)->sign || RBIGNUM(y)->sign); + zds = BDIGITS(z); + + for (i=0; i<l1; i++) { + zds[i] = ds1[i] & ds2[i]; + } + for (; i<l2; i++) { + zds[i] = sign?0:ds2[i]; + } + if (!RBIGNUM(z)->sign) get2comp(z, Qfalse); + return bignorm(z); +} + +/* + * call-seq: + * big | numeric => integer + * + * Performs bitwise +or+ between _big_ and _numeric_. + */ + +VALUE +rb_big_or(xx, yy) + VALUE xx, yy; +{ + volatile VALUE x, y, z; + BDIGIT *ds1, *ds2, *zds; + long i, l1, l2; + char sign; + + x = xx; + y = rb_to_int(yy); + if (FIXNUM_P(y)) { + y = rb_int2big(FIX2LONG(y)); + } + + if (!RBIGNUM(y)->sign) { + y = rb_big_clone(y); + get2comp(y, Qtrue); + } + if (!RBIGNUM(x)->sign) { + x = rb_big_clone(x); + get2comp(x, Qtrue); + } + if (RBIGNUM(x)->len > RBIGNUM(y)->len) { + l1 = RBIGNUM(y)->len; + l2 = RBIGNUM(x)->len; + ds1 = BDIGITS(y); + ds2 = BDIGITS(x); + sign = RBIGNUM(y)->sign; + } + else { + l1 = RBIGNUM(x)->len; + l2 = RBIGNUM(y)->len; + ds1 = BDIGITS(x); + ds2 = BDIGITS(y); + sign = RBIGNUM(x)->sign; + } + z = bignew(l2, RBIGNUM(x)->sign && RBIGNUM(y)->sign); + zds = BDIGITS(z); + + for (i=0; i<l1; i++) { + zds[i] = ds1[i] | ds2[i]; + } + for (; i<l2; i++) { + zds[i] = sign?ds2[i]:(BIGRAD-1); + } + if (!RBIGNUM(z)->sign) get2comp(z, Qfalse); + + return bignorm(z); +} + +/* + * call-seq: + * big ^ numeric => integer + * + * Performs bitwise +exclusive or+ between _big_ and _numeric_. + */ + +VALUE +rb_big_xor(xx, yy) + VALUE xx, yy; +{ + volatile VALUE x, y; + VALUE z; + BDIGIT *ds1, *ds2, *zds; + long i, l1, l2; + char sign; + + x = xx; + y = rb_to_int(yy); + if (FIXNUM_P(y)) { + y = rb_int2big(FIX2LONG(y)); + } + + if (!RBIGNUM(y)->sign) { + y = rb_big_clone(y); + get2comp(y, Qtrue); + } + if (!RBIGNUM(x)->sign) { + x = rb_big_clone(x); + get2comp(x, Qtrue); + } + if (RBIGNUM(x)->len > RBIGNUM(y)->len) { + l1 = RBIGNUM(y)->len; + l2 = RBIGNUM(x)->len; + ds1 = BDIGITS(y); + ds2 = BDIGITS(x); + sign = RBIGNUM(y)->sign; + } + else { + l1 = RBIGNUM(x)->len; + l2 = RBIGNUM(y)->len; + ds1 = BDIGITS(x); + ds2 = BDIGITS(y); + sign = RBIGNUM(x)->sign; + } + RBIGNUM(x)->sign = RBIGNUM(x)->sign?1:0; + RBIGNUM(y)->sign = RBIGNUM(y)->sign?1:0; + z = bignew(l2, !(RBIGNUM(x)->sign ^ RBIGNUM(y)->sign)); + zds = BDIGITS(z); + + for (i=0; i<l1; i++) { + zds[i] = ds1[i] ^ ds2[i]; + } + for (; i<l2; i++) { + zds[i] = sign?ds2[i]:~ds2[i]; + } + if (!RBIGNUM(z)->sign) get2comp(z, Qfalse); + + return bignorm(z); +} + +static VALUE rb_big_rshift _((VALUE,VALUE)); + +/* + * call-seq: + * big << numeric => integer + * + * Shifts big left _numeric_ positions (right if _numeric_ is negative). + */ + +VALUE +rb_big_lshift(x, y) + VALUE x, y; +{ + BDIGIT *xds, *zds; + int shift = NUM2INT(y); + int s1 = shift/BITSPERDIG; + int s2 = shift%BITSPERDIG; + VALUE z; + BDIGIT_DBL num = 0; + long len, i; + + if (shift < 0) return rb_big_rshift(x, INT2FIX(-shift)); + len = RBIGNUM(x)->len; + z = bignew(len+s1+1, RBIGNUM(x)->sign); + zds = BDIGITS(z); + for (i=0; i<s1; i++) { + *zds++ = 0; + } + xds = BDIGITS(x); + for (i=0; i<len; i++) { + num = num | (BDIGIT_DBL)*xds++<<s2; + *zds++ = BIGLO(num); + num = BIGDN(num); + } + *zds = BIGLO(num); + return bignorm(z); +} + +/* + * call-seq: + * big >> numeric => integer + * + * Shifts big right _numeric_ positions (left if _numeric_ is negative). + */ + +static VALUE +rb_big_rshift(x, y) + VALUE x, y; +{ + BDIGIT *xds, *zds; + int shift = NUM2INT(y); + long s1 = shift/BITSPERDIG; + long s2 = shift%BITSPERDIG; + VALUE z; + BDIGIT_DBL num = 0; + long i, j; + + if (shift < 0) return rb_big_lshift(x, INT2FIX(-shift)); + + if (s1 > RBIGNUM(x)->len) { + if (RBIGNUM(x)->sign) + return INT2FIX(0); + else + return INT2FIX(-1); + } + if (!RBIGNUM(x)->sign) { + x = rb_big_clone(x); + get2comp(x, Qtrue); + } + xds = BDIGITS(x); + i = RBIGNUM(x)->len; j = i - s1; + z = bignew(j, RBIGNUM(x)->sign); + if (!RBIGNUM(x)->sign) { + num = ((BDIGIT_DBL)~0) << BITSPERDIG; + } + zds = BDIGITS(z); + while (i--, j--) { + num = (num | xds[i]) >> s2; + zds[j] = BIGLO(num); + num = BIGUP(xds[i]); + } + if (!RBIGNUM(x)->sign) { + get2comp(z, Qfalse); + } + return bignorm(z); +} + +/* + * call-seq: + * big[n] -> 0, 1 + * + * Bit Reference---Returns the <em>n</em>th bit in the (assumed) binary + * representation of <i>big</i>, where <i>big</i>[0] is the least + * significant bit. + * + * a = 9**15 + * 50.downto(0) do |n| + * print a[n] + * end + * + * <em>produces:</em> + * + * 000101110110100000111000011110010100111100010111001 + * + */ + +static VALUE +rb_big_aref(x, y) + VALUE x, y; +{ + BDIGIT *xds; + int shift; + long s1, s2; + + if (TYPE(y) == T_BIGNUM) { + if (!RBIGNUM(y)->sign || RBIGNUM(x)->sign) + return INT2FIX(0); + return INT2FIX(1); + } + shift = NUM2INT(y); + if (shift < 0) return INT2FIX(0); + s1 = shift/BITSPERDIG; + s2 = shift%BITSPERDIG; + + if (!RBIGNUM(x)->sign) { + if (s1 >= RBIGNUM(x)->len) return INT2FIX(1); + x = rb_big_clone(x); + get2comp(x, Qtrue); + } + else { + if (s1 >= RBIGNUM(x)->len) return INT2FIX(0); + } + xds = BDIGITS(x); + if (xds[s1] & (1<<s2)) + return INT2FIX(1); + return INT2FIX(0); +} + +/* + * call-seq: + * big.hash => fixnum + * + * Compute a hash based on the value of _big_. + */ + +static VALUE +rb_big_hash(x) + VALUE x; +{ + long i, len, key; + BDIGIT *digits; + + key = 0; digits = BDIGITS(x); len = RBIGNUM(x)->len; + for (i=0; i<len; i++) { + key ^= *digits++; + } + return LONG2FIX(key); +} + +/* + * MISSING: documentation + */ + +static VALUE +rb_big_coerce(x, y) + VALUE x, y; +{ + if (FIXNUM_P(y)) { + return rb_assoc_new(rb_int2big(FIX2LONG(y)), x); + } + else { + rb_raise(rb_eTypeError, "can't coerce %s to Bignum", + rb_obj_classname(y)); + } + /* not reached */ + return Qnil; +} + +/* + * call-seq: + * big.abs -> aBignum + * + * Returns the absolute value of <i>big</i>. + * + * -1234567890987654321.abs #=> 1234567890987654321 + */ + +static VALUE +rb_big_abs(x) + VALUE x; +{ + if (!RBIGNUM(x)->sign) { + x = rb_big_clone(x); + RBIGNUM(x)->sign = 1; + } + return x; +} + +VALUE +rb_big_rand(max, rand_buf) + VALUE max; + double *rand_buf; +{ + VALUE v; + long len = RBIGNUM(max)->len; + + if (BIGZEROP(max)) { + return rb_float_new(rand_buf[0]); + } + v = bignew(len,1); + len--; + BDIGITS(v)[len] = BDIGITS(max)[len] * rand_buf[len]; + while (len--) { + BDIGITS(v)[len] = ((BDIGIT)~0) * rand_buf[len]; + } + + return v; +} + +/* + * call-seq: + * big.size -> integer + * + * Returns the number of bytes in the machine representation of + * <i>big</i>. + * + * (256**10 - 1).size #=> 12 + * (256**20 - 1).size #=> 20 + * (256**40 - 1).size #=> 40 + */ + +static VALUE +rb_big_size(big) + VALUE big; +{ + return LONG2FIX(RBIGNUM(big)->len*SIZEOF_BDIGITS); +} + +/* + * Bignum objects hold integers outside the range of + * Fixnum. Bignum objects are created + * automatically when integer calculations would otherwise overflow a + * Fixnum. When a calculation involving + * Bignum objects returns a result that will fit in a + * Fixnum, the result is automatically converted. + * + * For the purposes of the bitwise operations and <code>[]</code>, a + * Bignum is treated as if it were an infinite-length + * bitstring with 2's complement representation. + * + * While Fixnum values are immediate, Bignum + * objects are not---assignment and parameter passing work with + * references to objects, not the objects themselves. + * + */ + +void +Init_Bignum() +{ + rb_cBignum = rb_define_class("Bignum", rb_cInteger); + + rb_define_method(rb_cBignum, "to_s", rb_big_to_s, -1); + rb_define_method(rb_cBignum, "coerce", rb_big_coerce, 1); + rb_define_method(rb_cBignum, "-@", rb_big_uminus, 0); + rb_define_method(rb_cBignum, "+", rb_big_plus, 1); + rb_define_method(rb_cBignum, "-", rb_big_minus, 1); + rb_define_method(rb_cBignum, "*", rb_big_mul, 1); + rb_define_method(rb_cBignum, "/", rb_big_div, 1); + rb_define_method(rb_cBignum, "%", rb_big_modulo, 1); + rb_define_method(rb_cBignum, "div", rb_big_div, 1); + rb_define_method(rb_cBignum, "divmod", rb_big_divmod, 1); + rb_define_method(rb_cBignum, "modulo", rb_big_modulo, 1); + rb_define_method(rb_cBignum, "remainder", rb_big_remainder, 1); + rb_define_method(rb_cBignum, "quo", rb_big_quo, 1); + rb_define_method(rb_cBignum, "**", rb_big_pow, 1); + rb_define_method(rb_cBignum, "&", rb_big_and, 1); + rb_define_method(rb_cBignum, "|", rb_big_or, 1); + rb_define_method(rb_cBignum, "^", rb_big_xor, 1); + rb_define_method(rb_cBignum, "~", rb_big_neg, 0); + rb_define_method(rb_cBignum, "<<", rb_big_lshift, 1); + rb_define_method(rb_cBignum, ">>", rb_big_rshift, 1); + rb_define_method(rb_cBignum, "[]", rb_big_aref, 1); + + rb_define_method(rb_cBignum, "<=>", rb_big_cmp, 1); + rb_define_method(rb_cBignum, "==", rb_big_eq, 1); + rb_define_method(rb_cBignum, "eql?", rb_big_eql, 1); + rb_define_method(rb_cBignum, "hash", rb_big_hash, 0); + rb_define_method(rb_cBignum, "to_f", rb_big_to_f, 0); + rb_define_method(rb_cBignum, "abs", rb_big_abs, 0); + rb_define_method(rb_cBignum, "size", rb_big_size, 0); +} +/********************************************************************** + + class.c - + + $Author: matz $ + $Date: 2005/03/04 06:47:45 $ + created at: Tue Aug 10 15:05:44 JST 1993 + + Copyright (C) 1993-2003 Yukihiro Matsumoto + +**********************************************************************/ + +#include "ruby.h" +#include "rubysig.h" +#include "node.h" +#include "st.h" +#include <ctype.h> + +extern st_table *rb_class_tbl; + +VALUE +rb_class_boot(super) + VALUE super; +{ + NEWOBJ(klass, struct RClass); + OBJSETUP(klass, rb_cClass, T_CLASS); + + klass->super = super; + klass->iv_tbl = 0; + klass->m_tbl = 0; /* safe GC */ + klass->m_tbl = st_init_numtable(); + + OBJ_INFECT(klass, super); + return (VALUE)klass; +} + +void +rb_check_inheritable(super) + VALUE super; +{ + if (TYPE(super) != T_CLASS) { + rb_raise(rb_eTypeError, "superclass must be a Class (%s given)", + rb_obj_classname(super)); + } + if (RBASIC(super)->flags & FL_SINGLETON) { + rb_raise(rb_eTypeError, "can't make subclass of singleton class"); + } +} + +VALUE +rb_class_new(super) + VALUE super; +{ + Check_Type(super, T_CLASS); + rb_check_inheritable(super); + if (super == rb_cClass) { + rb_raise(rb_eTypeError, "can't make subclass of Class"); + } + return rb_class_boot(super); +} + +static int +clone_method(mid, body, tbl) + ID mid; + NODE *body; + st_table *tbl; +{ + st_insert(tbl, mid, (st_data_t)NEW_METHOD(body->nd_body, body->nd_noex)); + return ST_CONTINUE; +} + +/* :nodoc: */ +VALUE +rb_mod_init_copy(clone, orig) + VALUE clone, orig; +{ + rb_obj_init_copy(clone, orig); + if (!FL_TEST(CLASS_OF(clone), FL_SINGLETON)) { + RBASIC(clone)->klass = rb_singleton_class_clone(orig); + } + RCLASS(clone)->super = RCLASS(orig)->super; + if (RCLASS(orig)->iv_tbl) { + ID id; + + RCLASS(clone)->iv_tbl = st_copy(RCLASS(orig)->iv_tbl); + id = rb_intern("__classpath__"); + st_delete(RCLASS(clone)->iv_tbl, (st_data_t*)&id, 0); + id = rb_intern("__classid__"); + st_delete(RCLASS(clone)->iv_tbl, (st_data_t*)&id, 0); + } + if (RCLASS(orig)->m_tbl) { + RCLASS(clone)->m_tbl = st_init_numtable(); + st_foreach(RCLASS(orig)->m_tbl, clone_method, + (st_data_t)RCLASS(clone)->m_tbl); + } + + return clone; +} + +/* :nodoc: */ +VALUE +rb_class_init_copy(clone, orig) + VALUE clone, orig; +{ + if (RCLASS(clone)->super != 0) { + rb_raise(rb_eTypeError, "already initialized class"); + } + return rb_mod_init_copy(clone, orig); +} + +VALUE +rb_singleton_class_clone(obj) + VALUE obj; +{ + VALUE klass = RBASIC(obj)->klass; + + if (!FL_TEST(klass, FL_SINGLETON)) + return klass; + else { + /* copy singleton(unnamed) class */ + NEWOBJ(clone, struct RClass); + OBJSETUP(clone, 0, RBASIC(klass)->flags); + + if (BUILTIN_TYPE(obj) == T_CLASS) { + RBASIC(clone)->klass = (VALUE)clone; + } + else { + RBASIC(clone)->klass = rb_singleton_class_clone(klass); + } + + clone->super = RCLASS(klass)->super; + clone->iv_tbl = 0; + clone->m_tbl = 0; + if (RCLASS(klass)->iv_tbl) { + clone->iv_tbl = st_copy(RCLASS(klass)->iv_tbl); + } + clone->m_tbl = st_init_numtable(); + st_foreach(RCLASS(klass)->m_tbl, clone_method, + (st_data_t)clone->m_tbl); + rb_singleton_class_attached(RBASIC(clone)->klass, (VALUE)clone); + FL_SET(clone, FL_SINGLETON); + return (VALUE)clone; + } +} + +void +rb_singleton_class_attached(klass, obj) + VALUE klass, obj; +{ + if (FL_TEST(klass, FL_SINGLETON)) { + if (!RCLASS(klass)->iv_tbl) { + RCLASS(klass)->iv_tbl = st_init_numtable(); + } + st_insert(RCLASS(klass)->iv_tbl, rb_intern("__attached__"), obj); + } +} + +VALUE +rb_make_metaclass(obj, super) + VALUE obj, super; +{ + if (BUILTIN_TYPE(obj) == T_CLASS && FL_TEST(obj, FL_SINGLETON)) { + return RBASIC(obj)->klass = rb_cClass; + } + else { + VALUE metasuper; + VALUE klass = rb_class_boot(super); + + FL_SET(klass, FL_SINGLETON); + RBASIC(obj)->klass = klass; + rb_singleton_class_attached(klass, obj); + + metasuper = RBASIC(rb_class_real(super))->klass; + /* metaclass of a superclass may be NULL at boot time */ + if (metasuper) { + RBASIC(klass)->klass = metasuper; + } + return klass; + } +} + +VALUE +rb_define_class_id(id, super) + ID id; + VALUE super; +{ + VALUE klass; + + if (!super) super = rb_cObject; + klass = rb_class_new(super); + rb_make_metaclass(klass, RBASIC(super)->klass); + + return klass; +} + +VALUE +rb_class_inherited(super, klass) + VALUE super, klass; +{ + if (!super) super = rb_cObject; + return rb_funcall(super, rb_intern("inherited"), 1, klass); +} + +VALUE +rb_define_class(name, super) + const char *name; + VALUE super; +{ + VALUE klass; + ID id; + + id = rb_intern(name); + if (rb_const_defined(rb_cObject, id)) { + klass = rb_const_get(rb_cObject, id); + if (TYPE(klass) != T_CLASS) { + rb_raise(rb_eTypeError, "%s is not a class", name); + } + if (rb_class_real(RCLASS(klass)->super) != super) { + rb_name_error(id, "%s is already defined", name); + } + return klass; + } + if (!super) { + rb_warn("no super class for `%s', Object assumed", name); + } + klass = rb_define_class_id(id, super); + st_add_direct(rb_class_tbl, id, klass); + rb_name_class(klass, id); + rb_const_set(rb_cObject, id, klass); + rb_class_inherited(super, klass); + + return klass; +} + +VALUE +rb_define_class_under(outer, name, super) + VALUE outer; + const char *name; + VALUE super; +{ + VALUE klass; + ID id; + + id = rb_intern(name); + if (rb_const_defined_at(outer, id)) { + klass = rb_const_get_at(outer, id); + if (TYPE(klass) != T_CLASS) { + rb_raise(rb_eTypeError, "%s is not a class", name); + } + if (rb_class_real(RCLASS(klass)->super) != super) { + rb_name_error(id, "%s is already defined", name); + } + return klass; + } + if (!super) { + rb_warn("no super class for `%s::%s', Object assumed", + rb_class2name(outer), name); + } + klass = rb_define_class_id(id, super); + rb_set_class_path(klass, outer, name); + rb_const_set(outer, id, klass); + rb_class_inherited(super, klass); + + return klass; +} + +VALUE +rb_module_new() +{ + NEWOBJ(mdl, struct RClass); + OBJSETUP(mdl, rb_cModule, T_MODULE); + + mdl->super = 0; + mdl->iv_tbl = 0; + mdl->m_tbl = 0; + mdl->m_tbl = st_init_numtable(); + + return (VALUE)mdl; +} + +VALUE +rb_define_module_id(id) + ID id; +{ + VALUE mdl; + + mdl = rb_module_new(); + rb_name_class(mdl, id); + + return mdl; +} + +VALUE +rb_define_module(name) + const char *name; +{ + VALUE module; + ID id; + + id = rb_intern(name); + if (rb_const_defined(rb_cObject, id)) { + module = rb_const_get(rb_cObject, id); + if (TYPE(module) == T_MODULE) + return module; + rb_raise(rb_eTypeError, "%s is not a module", rb_obj_classname(module)); + } + module = rb_define_module_id(id); + st_add_direct(rb_class_tbl, id, module); + rb_const_set(rb_cObject, id, module); + + return module; +} + +VALUE +rb_define_module_under(outer, name) + VALUE outer; + const char *name; +{ + VALUE module; + ID id; + + id = rb_intern(name); + if (rb_const_defined_at(outer, id)) { + module = rb_const_get_at(outer, id); + if (TYPE(module) == T_MODULE) + return module; + rb_raise(rb_eTypeError, "%s::%s is not a module", + rb_class2name(outer), rb_obj_classname(module)); + } + module = rb_define_module_id(id); + rb_const_set(outer, id, module); + rb_set_class_path(module, outer, name); + + return module; +} + +static VALUE +include_class_new(module, super) + VALUE module, super; +{ + NEWOBJ(klass, struct RClass); + OBJSETUP(klass, rb_cClass, T_ICLASS); + + if (BUILTIN_TYPE(module) == T_ICLASS) { + module = RBASIC(module)->klass; + } + if (!RCLASS(module)->iv_tbl) { + RCLASS(module)->iv_tbl = st_init_numtable(); + } + klass->iv_tbl = RCLASS(module)->iv_tbl; + klass->m_tbl = RCLASS(module)->m_tbl; + klass->super = super; + if (TYPE(module) == T_ICLASS) { + RBASIC(klass)->klass = RBASIC(module)->klass; + } + else { + RBASIC(klass)->klass = module; + } + OBJ_INFECT(klass, module); + OBJ_INFECT(klass, super); + + return (VALUE)klass; +} + +void +rb_include_module(klass, module) + VALUE klass, module; +{ + VALUE p, c; + int changed = 0; + + rb_frozen_class_p(klass); + if (!OBJ_TAINTED(klass)) { + rb_secure(4); + } + + if (NIL_P(module)) return; + if (klass == module) return; + + if (TYPE(module) != T_MODULE) { + Check_Type(module, T_MODULE); + } + + OBJ_INFECT(klass, module); + c = klass; + while (module) { + int superclass_seen = Qfalse; + + if (RCLASS(klass)->m_tbl == RCLASS(module)->m_tbl) + rb_raise(rb_eArgError, "cyclic include detected"); + /* ignore if the module included already in superclasses */ + for (p = RCLASS(klass)->super; p; p = RCLASS(p)->super) { + switch (BUILTIN_TYPE(p)) { + case T_ICLASS: + if (RCLASS(p)->m_tbl == RCLASS(module)->m_tbl) { + if (!superclass_seen) { + c = p; /* move insertion point */ + } + goto skip; + } + break; + case T_CLASS: + superclass_seen = Qtrue; + break; + } + } + c = RCLASS(c)->super = include_class_new(module, RCLASS(c)->super); + changed = 1; + skip: + module = RCLASS(module)->super; + } + if (changed) rb_clear_cache(); +} + +/* + * call-seq: + * mod.included_modules -> array + * + * Returns the list of modules included in <i>mod</i>. + * + * module Mixin + * end + * + * module Outer + * include Mixin + * end + * + * Mixin.included_modules #=> [] + * Outer.included_modules #=> [Mixin] + */ + +VALUE +rb_mod_included_modules(mod) + VALUE mod; +{ + VALUE ary = rb_ary_new(); + VALUE p; + + for (p = RCLASS(mod)->super; p; p = RCLASS(p)->super) { + if (BUILTIN_TYPE(p) == T_ICLASS) { + rb_ary_push(ary, RBASIC(p)->klass); + } + } + return ary; +} + +/* + * call-seq: + * mod.include?(module) => true or false + * + * Returns <code>true</code> if <i>module</i> is included in + * <i>mod</i> or one of <i>mod</i>'s ancestors. + * + * module A + * end + * class B + * include A + * end + * class C < B + * end + * B.include?(A) #=> true + * C.include?(A) #=> true + * A.include?(A) #=> false + */ + +VALUE +rb_mod_include_p(mod, mod2) + VALUE mod; + VALUE mod2; +{ + VALUE p; + + Check_Type(mod2, T_MODULE); + for (p = RCLASS(mod)->super; p; p = RCLASS(p)->super) { + if (BUILTIN_TYPE(p) == T_ICLASS) { + if (RBASIC(p)->klass == mod2) return Qtrue; + } + } + return Qfalse; +} + +/* + * call-seq: + * mod.ancestors -> array + * + * Returns a list of modules included in <i>mod</i> (including + * <i>mod</i> itself). + * + * module Mod + * include Math + * include Comparable + * end + * + * Mod.ancestors #=> [Mod, Comparable, Math] + * Math.ancestors #=> [Math] + */ + +VALUE +rb_mod_ancestors(mod) + VALUE mod; +{ + VALUE p, ary = rb_ary_new(); + + for (p = mod; p; p = RCLASS(p)->super) { + if (FL_TEST(p, FL_SINGLETON)) + continue; + if (BUILTIN_TYPE(p) == T_ICLASS) { + rb_ary_push(ary, RBASIC(p)->klass); + } + else { + rb_ary_push(ary, p); + } + } + return ary; +} + +#define VISI(x) ((x)&NOEX_MASK) +#define VISI_CHECK(x,f) (VISI(x) == (f)) + +static int +ins_methods_push(name, type, ary, visi) + ID name; + long type; + VALUE ary; + long visi; +{ + if (type == -1) return ST_CONTINUE; + switch (visi) { + case NOEX_PRIVATE: + case NOEX_PROTECTED: + case NOEX_PUBLIC: + visi = (type == visi); + break; + default: + visi = (type != NOEX_PRIVATE); + break; + } + if (visi) { + rb_ary_push(ary, rb_str_new2(rb_id2name(name))); + } + return ST_CONTINUE; +} + +static int +ins_methods_i(name, type, ary) + ID name; + long type; + VALUE ary; +{ + return ins_methods_push(name, type, ary, -1); /* everything but private */ +} + +static int +ins_methods_prot_i(name, type, ary) + ID name; + long type; + VALUE ary; +{ + return ins_methods_push(name, type, ary, NOEX_PROTECTED); +} + +static int +ins_methods_priv_i(name, type, ary) + ID name; + long type; + VALUE ary; +{ + return ins_methods_push(name, type, ary, NOEX_PRIVATE); +} + +static int +ins_methods_pub_i(name, type, ary) + ID name; + long type; + VALUE ary; +{ + return ins_methods_push(name, type, ary, NOEX_PUBLIC); +} + +static int +method_entry(key, body, list) + ID key; + NODE *body; + st_table *list; +{ + long type; + + if (key == ID_ALLOCATOR) return ST_CONTINUE; + if (!st_lookup(list, key, 0)) { + if (!body->nd_body) type = -1; /* none */ + else type = VISI(body->nd_noex); + st_add_direct(list, key, type); + } + return ST_CONTINUE; +} + +static VALUE +class_instance_method_list(argc, argv, mod, func) + int argc; + VALUE *argv; + VALUE mod; + int (*func) _((ID, long, VALUE)); +{ + VALUE ary; + int recur; + st_table *list; + + if (argc == 0) { + recur = Qtrue; + } + else { + VALUE r; + rb_scan_args(argc, argv, "01", &r); + recur = RTEST(r); + } + + list = st_init_numtable(); + for (; mod; mod = RCLASS(mod)->super) { + st_foreach(RCLASS(mod)->m_tbl, method_entry, (st_data_t)list); + if (BUILTIN_TYPE(mod) == T_ICLASS) continue; + if (FL_TEST(mod, FL_SINGLETON)) continue; + if (!recur) break; + } + ary = rb_ary_new(); + st_foreach(list, func, ary); + st_free_table(list); + + return ary; +} + +/* + * call-seq: + * mod.instance_methods(include_super=true) => array + * + * Returns an array containing the names of public instance methods in + * the receiver. For a module, these are the public methods; for a + * class, they are the instance (not singleton) methods. With no + * argument, or with an argument that is <code>false</code>, the + * instance methods in <i>mod</i> are returned, otherwise the methods + * in <i>mod</i> and <i>mod</i>'s superclasses are returned. + * + * module A + * def method1() end + * end + * class B + * def method2() end + * end + * class C < B + * def method3() end + * end + * + * A.instance_methods #=> ["method1"] + * B.instance_methods(false) #=> ["method2"] + * C.instance_methods(false) #=> ["method3"] + * C.instance_methods(true).length #=> 43 + */ + +VALUE +rb_class_instance_methods(argc, argv, mod) + int argc; + VALUE *argv; + VALUE mod; +{ + return class_instance_method_list(argc, argv, mod, ins_methods_i); +} + +/* + * call-seq: + * mod.protected_instance_methods(include_super=true) => array + * + * Returns a list of the protected instance methods defined in + * <i>mod</i>. If the optional parameter is not <code>false</code>, the + * methods of any ancestors are included. + */ + +VALUE +rb_class_protected_instance_methods(argc, argv, mod) + int argc; + VALUE *argv; + VALUE mod; +{ + return class_instance_method_list(argc, argv, mod, ins_methods_prot_i); +} + +/* + * call-seq: + * mod.private_instance_methods(include_super=true) => array + * + * Returns a list of the private instance methods defined in + * <i>mod</i>. If the optional parameter is not <code>false</code>, the + * methods of any ancestors are included. + * + * module Mod + * def method1() end + * private :method1 + * def method2() end + * end + * Mod.instance_methods #=> ["method2"] + * Mod.private_instance_methods #=> ["method1"] + */ + +VALUE +rb_class_private_instance_methods(argc, argv, mod) + int argc; + VALUE *argv; + VALUE mod; +{ + return class_instance_method_list(argc, argv, mod, ins_methods_priv_i); +} + +/* + * call-seq: + * mod.public_instance_methods(include_super=true) => array + * + * Returns a list of the public instance methods defined in <i>mod</i>. + * If the optional parameter is not <code>false</code>, the methods of + * any ancestors are included. + */ + +VALUE +rb_class_public_instance_methods(argc, argv, mod) + int argc; + VALUE *argv; + VALUE mod; +{ + return class_instance_method_list(argc, argv, mod, ins_methods_pub_i); +} + +/* + * call-seq: + * obj.singleton_methods(all=true) => array + * + * Returns an array of the names of singleton methods for <i>obj</i>. + * If the optional <i>all</i> parameter is true, the list will include + * methods in modules included in <i>obj</i>. + * + * module Other + * def three() end + * end + * + * class Single + * def Single.four() end + * end + * + * a = Single.new + * + * def a.one() + * end + * + * class << a + * include Other + * def two() + * end + * end + * + * Single.singleton_methods #=> ["four"] + * a.singleton_methods(false) #=> ["two", "one"] + * a.singleton_methods #=> ["two", "one", "three"] + */ + +VALUE +rb_obj_singleton_methods(argc, argv, obj) + int argc; + VALUE *argv; + VALUE obj; +{ + VALUE recur, ary, klass; + st_table *list; + + rb_scan_args(argc, argv, "01", &recur); + if (argc == 0) { + recur = Qtrue; + } + klass = CLASS_OF(obj); + list = st_init_numtable(); + if (klass && FL_TEST(klass, FL_SINGLETON)) { + st_foreach(RCLASS(klass)->m_tbl, method_entry, (st_data_t)list); + klass = RCLASS(klass)->super; + } + if (RTEST(recur)) { + while (klass && (FL_TEST(klass, FL_SINGLETON) || TYPE(klass) == T_ICLASS)) { + st_foreach(RCLASS(klass)->m_tbl, method_entry, (st_data_t)list); + klass = RCLASS(klass)->super; + } + } + ary = rb_ary_new(); + st_foreach(list, ins_methods_i, ary); + st_free_table(list); + + return ary; +} + +void +rb_define_method_id(klass, name, func, argc) + VALUE klass; + ID name; + VALUE (*func)(); + int argc; +{ + rb_add_method(klass, name, NEW_CFUNC(func,argc), NOEX_PUBLIC); +} + +void +rb_define_method(klass, name, func, argc) + VALUE klass; + const char *name; + VALUE (*func)(); + int argc; +{ + rb_add_method(klass, rb_intern(name), NEW_CFUNC(func, argc), NOEX_PUBLIC); +} + +void +rb_define_protected_method(klass, name, func, argc) + VALUE klass; + const char *name; + VALUE (*func)(); + int argc; +{ + rb_add_method(klass, rb_intern(name), NEW_CFUNC(func, argc), NOEX_PROTECTED); +} + +void +rb_define_private_method(klass, name, func, argc) + VALUE klass; + const char *name; + VALUE (*func)(); + int argc; +{ + rb_add_method(klass, rb_intern(name), NEW_CFUNC(func, argc), NOEX_PRIVATE); +} + +void +rb_undef_method(klass, name) + VALUE klass; + const char *name; +{ + rb_add_method(klass, rb_intern(name), 0, NOEX_UNDEF); +} + +#define SPECIAL_SINGLETON(x,c) do {\ + if (obj == (x)) {\ + return c;\ + }\ +} while (0) + +VALUE +rb_singleton_class(obj) + VALUE obj; +{ + VALUE klass; + + if (FIXNUM_P(obj) || SYMBOL_P(obj)) { + rb_raise(rb_eTypeError, "can't define singleton"); + } + if (rb_special_const_p(obj)) { + SPECIAL_SINGLETON(Qnil, rb_cNilClass); + SPECIAL_SINGLETON(Qfalse, rb_cFalseClass); + SPECIAL_SINGLETON(Qtrue, rb_cTrueClass); + rb_bug("unknown immediate %ld", obj); + } + + DEFER_INTS; + if (FL_TEST(RBASIC(obj)->klass, FL_SINGLETON) && + rb_iv_get(RBASIC(obj)->klass, "__attached__") == obj) { + klass = RBASIC(obj)->klass; + } + else { + klass = rb_make_metaclass(obj, RBASIC(obj)->klass); + } + if (OBJ_TAINTED(obj)) { + OBJ_TAINT(klass); + } + else { + FL_UNSET(klass, FL_TAINT); + } + if (OBJ_FROZEN(obj)) OBJ_FREEZE(klass); + ALLOW_INTS; + + return klass; +} + +void +rb_define_singleton_method(obj, name, func, argc) + VALUE obj; + const char *name; + VALUE (*func)(); + int argc; +{ + rb_define_method(rb_singleton_class(obj), name, func, argc); +} + +void +rb_define_module_function(module, name, func, argc) + VALUE module; + const char *name; + VALUE (*func)(); + int argc; +{ + rb_define_private_method(module, name, func, argc); + rb_define_singleton_method(module, name, func, argc); +} + +void +rb_define_global_function(name, func, argc) + const char *name; + VALUE (*func)(); + int argc; +{ + rb_define_module_function(rb_mKernel, name, func, argc); +} + +void +rb_define_alias(klass, name1, name2) + VALUE klass; + const char *name1, *name2; +{ + rb_alias(klass, rb_intern(name1), rb_intern(name2)); +} + +void +rb_define_attr(klass, name, read, write) + VALUE klass; + const char *name; + int read, write; +{ + rb_attr(klass, rb_intern(name), read, write, Qfalse); +} + +#ifdef HAVE_STDARG_PROTOTYPES +#include <stdarg.h> +#define va_init_list(a,b) va_start(a,b) +#else +#include <varargs.h> +#define va_init_list(a,b) va_start(a) +#endif + +int +#ifdef HAVE_STDARG_PROTOTYPES +rb_scan_args(int argc, const VALUE *argv, const char *fmt, ...) +#else +rb_scan_args(argc, argv, fmt, va_alist) + int argc; + const VALUE *argv; + const char *fmt; + va_dcl +#endif +{ + int n, i = 0; + const char *p = fmt; + VALUE *var; + va_list vargs; + + va_init_list(vargs, fmt); + + if (*p == '*') goto rest_arg; + + if (ISDIGIT(*p)) { + n = *p - '0'; + if (n > argc) + rb_raise(rb_eArgError, "wrong number of arguments (%d for %d)", argc, n); + for (i=0; i<n; i++) { + var = va_arg(vargs, VALUE*); + if (var) *var = argv[i]; + } + p++; + } + else { + goto error; + } + + if (ISDIGIT(*p)) { + n = i + *p - '0'; + for (; i<n; i++) { + var = va_arg(vargs, VALUE*); + if (argc > i) { + if (var) *var = argv[i]; + } + else { + if (var) *var = Qnil; + } + } + p++; + } + + if(*p == '*') { + rest_arg: + var = va_arg(vargs, VALUE*); + if (argc > i) { + if (var) *var = rb_ary_new4(argc-i, argv+i); + i = argc; + } + else { + if (var) *var = rb_ary_new(); + } + p++; + } + + if (*p == '&') { + var = va_arg(vargs, VALUE*); + if (rb_block_given_p()) { + *var = rb_block_proc(); + } + else { + *var = Qnil; + } + p++; + } + va_end(vargs); + + if (*p != '\0') { + goto error; + } + + if (argc > i) { + rb_raise(rb_eArgError, "wrong number of arguments (%d for %d)", argc, i); + } + + return argc; + + error: + rb_fatal("bad scan arg format: %s", fmt); + return 0; +} +/********************************************************************** + + compar.c - + + $Author: michal $ + $Date: 2004/06/22 06:30:41 $ + created at: Thu Aug 26 14:39:48 JST 1993 + + Copyright (C) 1993-2003 Yukihiro Matsumoto + +**********************************************************************/ + +#include "ruby.h" + +VALUE rb_mComparable; + +static ID cmp; + +int +rb_cmpint(val, a, b) + VALUE val, a, b; +{ + if (NIL_P(val)) { + rb_cmperr(a, b); + } + if (FIXNUM_P(val)) return FIX2INT(val); + if (TYPE(val) == T_BIGNUM) { + if (RBIGNUM(val)->sign) return 1; + return -1; + } + if (RTEST(rb_funcall(val, '>', 1, INT2FIX(0)))) return 1; + if (RTEST(rb_funcall(val, '<', 1, INT2FIX(0)))) return -1; + return 0; +} + +void +rb_cmperr(x, y) + VALUE x, y; +{ + const char *classname; + + if (SPECIAL_CONST_P(y)) { + y = rb_inspect(y); + classname = StringValuePtr(y); + } + else { + classname = rb_obj_classname(y); + } + rb_raise(rb_eArgError, "comparison of %s with %s failed", + rb_obj_classname(x), classname); +} + +static VALUE +cmp_eq(a) + VALUE *a; +{ + VALUE c = rb_funcall(a[0], cmp, 1, a[1]); + + if (NIL_P(c)) return Qnil; + if (rb_cmpint(c, a[0], a[1]) == 0) return Qtrue; + return Qfalse; +} + +static VALUE +cmp_failed() +{ + return Qnil; +} + +/* + * call-seq: + * obj == other => true or false + * + * Compares two objects based on the receiver's <code><=></code> + * method, returning true if it returns 0. Also returns true if + * _obj_ and _other_ are the same object. + */ + +static VALUE +cmp_equal(x, y) + VALUE x, y; +{ + VALUE a[2]; + + if (x == y) return Qtrue; + + a[0] = x; a[1] = y; + return rb_rescue(cmp_eq, (VALUE)a, cmp_failed, 0); +} + +/* + * call-seq: + * obj > other => true or false + * + * Compares two objects based on the receiver's <code><=></code> + * method, returning true if it returns 1. + */ + +static VALUE +cmp_gt(x, y) + VALUE x, y; +{ + VALUE c = rb_funcall(x, cmp, 1, y); + + if (rb_cmpint(c, x, y) > 0) return Qtrue; + return Qfalse; +} + +/* + * call-seq: + * obj >= other => true or false + * + * Compares two objects based on the receiver's <code><=></code> + * method, returning true if it returns 0 or 1. + */ + +static VALUE +cmp_ge(x, y) + VALUE x, y; +{ + VALUE c = rb_funcall(x, cmp, 1, y); + + if (rb_cmpint(c, x, y) >= 0) return Qtrue; + return Qfalse; +} + +/* + * call-seq: + * obj < other => true or false + * + * Compares two objects based on the receiver's <code><=></code> + * method, returning true if it returns -1. + */ + +static VALUE +cmp_lt(x, y) + VALUE x, y; +{ + VALUE c = rb_funcall(x, cmp, 1, y); + + if (rb_cmpint(c, x, y) < 0) return Qtrue; + return Qfalse; +} + + +/* + * call-seq: + * obj <= other => true or false + * + * Compares two objects based on the receiver's <code><=></code> + * method, returning true if it returns -1 or 0. + */ + +static VALUE +cmp_le(x, y) + VALUE x, y; +{ + VALUE c = rb_funcall(x, cmp, 1, y); + + if (rb_cmpint(c, x, y) <= 0) return Qtrue; + return Qfalse; +} + +/* + * call-seq: + * obj.between?(min, max) => true or false + * + * Returns <code>false</code> if <i>obj</i> <code><=></code> + * <i>min</i> is less than zero or if <i>anObject</i> <code><=></code> + * <i>max</i> is greater than zero, <code>true</code> otherwise. + * + * 3.between?(1, 5) #=> true + * 6.between?(1, 5) #=> false + * 'cat'.between?('ant', 'dog') #=> true + * 'gnu'.between?('ant', 'dog') #=> false + * + */ + +static VALUE +cmp_between(x, min, max) + VALUE x, min, max; +{ + if (RTEST(cmp_lt(x, min))) return Qfalse; + if (RTEST(cmp_gt(x, max))) return Qfalse; + return Qtrue; +} + +/* + * The <code>Comparable</code> mixin is used by classes whose objects + * may be ordered. The class must define the <code><=></code> operator, + * which compares the receiver against another object, returning -1, 0, + * or +1 depending on whether the receiver is less than, equal to, or + * greater than the other object. <code>Comparable</code> uses + * <code><=></code> to implement the conventional comparison operators + * (<code><</code>, <code><=</code>, <code>==</code>, <code>>=</code>, + * and <code>></code>) and the method <code>between?</code>. + * + * class SizeMatters + * include Comparable + * attr :str + * def <=>(anOther) + * str.size <=> anOther.str.size + * end + * def initialize(str) + * @str = str + * end + * def inspect + * @str + * end + * end + * + * s1 = SizeMatters.new("Z") + * s2 = SizeMatters.new("YY") + * s3 = SizeMatters.new("XXX") + * s4 = SizeMatters.new("WWWW") + * s5 = SizeMatters.new("VVVVV") + * + * s1 < s2 #=> true + * s4.between?(s1, s3) #=> false + * s4.between?(s3, s5) #=> true + * [ s3, s2, s5, s4, s1 ].sort #=> [Z, YY, XXX, WWWW, VVVVV] + * + */ + +void +Init_Comparable() +{ + rb_mComparable = rb_define_module("Comparable"); + rb_define_method(rb_mComparable, "==", cmp_equal, 1); + rb_define_method(rb_mComparable, ">", cmp_gt, 1); + rb_define_method(rb_mComparable, ">=", cmp_ge, 1); + rb_define_method(rb_mComparable, "<", cmp_lt, 1); + rb_define_method(rb_mComparable, "<=", cmp_le, 1); + rb_define_method(rb_mComparable, "between?", cmp_between, 2); + + cmp = rb_intern("<=>"); +} +/********************************************************************** + + dir.c - + + $Author: eban $ + $Date: 2005/04/11 13:30:10 $ + created at: Wed Jan 5 09:51:01 JST 1994 + + Copyright (C) 1993-2003 Yukihiro Matsumoto + Copyright (C) 2000 Network Applied Communication Laboratory, Inc. + Copyright (C) 2000 Information-technology Promotion Agency, Japan + +**********************************************************************/ + +#include "ruby.h" + +#include <sys/types.h> +#include <sys/stat.h> + +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif + +#if defined HAVE_DIRENT_H && !defined _WIN32 +# include <dirent.h> +# define NAMLEN(dirent) strlen((dirent)->d_name) +#elif defined HAVE_DIRECT_H && !defined _WIN32 +# include <direct.h> +# define NAMLEN(dirent) strlen((dirent)->d_name) +#else +# define dirent direct +# if !defined __NeXT__ +# define NAMLEN(dirent) (dirent)->d_namlen +# else +# /* On some versions of NextStep, d_namlen is always zero, so avoid it. */ +# define NAMLEN(dirent) strlen((dirent)->d_name) +# endif +# if HAVE_SYS_NDIR_H +# include <sys/ndir.h> +# endif +# if HAVE_SYS_DIR_H +# include <sys/dir.h> +# endif +# if HAVE_NDIR_H +# include <ndir.h> +# endif +# ifdef _WIN32 +# include "win32/dir.h" +# endif +#endif + +#include <errno.h> + +#ifndef HAVE_STDLIB_H +char *getenv(); +#endif + +#ifndef HAVE_STRING_H +char *strchr _((char*,char)); +#endif + +#include <ctype.h> + +#include "util.h" + +#if !defined HAVE_LSTAT && !defined lstat +#define lstat stat +#endif + +#define FNM_NOESCAPE 0x01 +#define FNM_PATHNAME 0x02 +#define FNM_DOTMATCH 0x04 +#define FNM_CASEFOLD 0x08 + +#define FNM_NOMATCH 1 +#define FNM_ERROR 2 + +#define downcase(c) (nocase && ISUPPER(c) ? tolower(c) : (c)) +#define compare(c1, c2) (((unsigned char)(c1)) - ((unsigned char)(c2))) + +/* caution: in case *p == '\0' + Next(p) == p + 1 in single byte environment + Next(p) == p in multi byte environment +*/ +#if defined(CharNext) +# define Next(p) CharNext(p) +#elif defined(DJGPP) +# define Next(p) ((p) + mblen(p, RUBY_MBCHAR_MAXSIZE)) +#elif defined(__EMX__) +# define Next(p) ((p) + emx_mblen(p)) +static inline int +emx_mblen(p) + const char *p; +{ + int n = mblen(p, RUBY_MBCHAR_MAXSIZE); + return (n < 0) ? 1 : n; +} +#endif + +#ifndef Next /* single byte environment */ +# define Next(p) ((p) + 1) +# define Inc(p) (++(p)) +# define Compare(p1, p2) (compare(downcase(*(p1)), downcase(*(p2)))) +#else /* multi byte environment */ +# define Inc(p) ((p) = Next(p)) +# define Compare(p1, p2) (CompareImpl(p1, p2, nocase)) +static int +CompareImpl(p1, p2, nocase) + const char *p1; + const char *p2; + int nocase; +{ + const int len1 = Next(p1) - p1; + const int len2 = Next(p2) - p2; +#ifdef _WIN32 + char buf1[10], buf2[10]; /* large enough? */ +#endif + + if (len1 < 0 || len2 < 0) { + rb_fatal("CompareImpl: negative len"); + } + + if (len1 == 0) return len2; + if (len2 == 0) return -len1; + +#ifdef _WIN32 + if (nocase) { + if (len1 > 1) { + if (len1 >= sizeof(buf1)) { + rb_fatal("CompareImpl: too large len"); + } + memcpy(buf1, p1, len1); + buf1[len1] = '\0'; + CharLower(buf1); + p1 = buf1; /* trick */ + } + if (len2 > 1) { + if (len2 >= sizeof(buf2)) { + rb_fatal("CompareImpl: too large len"); + } + memcpy(buf2, p2, len2); + buf2[len2] = '\0'; + CharLower(buf2); + p2 = buf2; /* trick */ + } + } +#endif + if (len1 == 1) + if (len2 == 1) + return compare(downcase(*p1), downcase(*p2)); + else { + const int ret = compare(downcase(*p1), *p2); + return ret ? ret : -1; + } + else + if (len2 == 1) { + const int ret = compare(*p1, downcase(*p2)); + return ret ? ret : 1; + } + else { + const int ret = memcmp(p1, p2, len1 < len2 ? len1 : len2); + return ret ? ret : len1 - len2; + } +} +#endif /* environment */ + +static char * +bracket(p, s, flags) + const char *p; /* pattern (next to '[') */ + const char *s; /* string */ + int flags; +{ + const int nocase = flags & FNM_CASEFOLD; + const int escape = !(flags & FNM_NOESCAPE); + + int ok = 0, not = 0; + + if (*p == '!' || *p == '^') { + not = 1; + p++; + } + + while (*p != ']') { + const char *t1 = p; + if (escape && *t1 == '\\') + t1++; + if (!*t1) + return NULL; + p = Next(t1); + if (p[0] == '-' && p[1] != ']') { + const char *t2 = p + 1; + if (escape && *t2 == '\\') + t2++; + if (!*t2) + return NULL; + p = Next(t2); + if (!ok && Compare(t1, s) <= 0 && Compare(s, t2) <= 0) + ok = 1; + } + else + if (!ok && Compare(t1, s) == 0) + ok = 1; + } + + return ok == not ? NULL : (char *)p + 1; +} + +/* If FNM_PATHNAME is set, only path element will be matched. (upto '/' or '\0') + Otherwise, entire string will be matched. + End marker itself won't be compared. + And if function succeeds, *pcur reaches end marker. +*/ +#define UNESCAPE(p) (escape && *(p) == '\\' ? (p) + 1 : (p)) +#define ISEND(p) (!*(p) || (pathname && *(p) == '/')) +#define RETURN(val) return *pcur = p, *scur = s, (val); + +static int +fnmatch_helper(pcur, scur, flags) + const char **pcur; /* pattern */ + const char **scur; /* string */ + int flags; +{ + const int period = !(flags & FNM_DOTMATCH); + const int pathname = flags & FNM_PATHNAME; + const int escape = !(flags & FNM_NOESCAPE); + const int nocase = flags & FNM_CASEFOLD; + + const char *ptmp = 0; + const char *stmp = 0; + + const char *p = *pcur; + const char *s = *scur; + + if (period && *s == '.' && *UNESCAPE(p) != '.') /* leading period */ + RETURN(FNM_NOMATCH); + + while (1) { + switch (*p) { + case '*': + do { p++; } while (*p == '*'); + if (ISEND(UNESCAPE(p))) { + p = UNESCAPE(p); + RETURN(0); + } + if (ISEND(s)) + RETURN(FNM_NOMATCH); + ptmp = p; + stmp = s; + continue; + + case '?': + if (ISEND(s)) + RETURN(FNM_NOMATCH); + p++; + Inc(s); + continue; + + case '[': { + const char *t; + if (ISEND(s)) + RETURN(FNM_NOMATCH); + if (t = bracket(p + 1, s, flags)) { + p = t; + Inc(s); + continue; + } + goto failed; + } + } + + /* ordinary */ + p = UNESCAPE(p); + if (ISEND(s)) + RETURN(ISEND(p) ? 0 : FNM_NOMATCH); + if (ISEND(p)) + goto failed; + if (Compare(p, s) != 0) + goto failed; + Inc(p); + Inc(s); + continue; + + failed: /* try next '*' position */ + if (ptmp && stmp) { + p = ptmp; + Inc(stmp); /* !ISEND(*stmp) */ + s = stmp; + continue; + } + RETURN(FNM_NOMATCH); + } +} + +static int +fnmatch(p, s, flags) + const char *p; /* pattern */ + const char *s; /* string */ + int flags; +{ + const int period = !(flags & FNM_DOTMATCH); + const int pathname = flags & FNM_PATHNAME; + + const char *ptmp = 0; + const char *stmp = 0; + + if (pathname) { + while (1) { + if (p[0] == '*' && p[1] == '*' && p[2] == '/') { + do { p += 3; } while (p[0] == '*' && p[1] == '*' && p[2] == '/'); + ptmp = p; + stmp = s; + } + if (fnmatch_helper(&p, &s, flags) == 0) { + while (*s && *s != '/') Inc(s); + if (*p && *s) { + p++; + s++; + continue; + } + if (!*p && !*s) + return 0; + } + /* failed : try next recursion */ + if (ptmp && stmp && !(period && *stmp == '.')) { + while (*stmp && *stmp != '/') Inc(stmp); + if (*stmp) { + p = ptmp; + stmp++; + s = stmp; + continue; + } + } + return FNM_NOMATCH; + } + } + else + return fnmatch_helper(&p, &s, flags); +} + +VALUE rb_cDir; + +struct dir_data { + DIR *dir; + char *path; +}; + +static void +free_dir(dir) + struct dir_data *dir; +{ + if (dir) { + if (dir->dir) closedir(dir->dir); + if (dir->path) free(dir->path); + } + free(dir); +} + +static VALUE dir_close _((VALUE)); + +static VALUE dir_s_alloc _((VALUE)); +static VALUE +dir_s_alloc(klass) + VALUE klass; +{ + struct dir_data *dirp; + VALUE obj = Data_Make_Struct(klass, struct dir_data, 0, free_dir, dirp); + + dirp->dir = NULL; + dirp->path = NULL; + + return obj; +} + +/* + * call-seq: + * Dir.new( string ) -> aDir + * + * Returns a new directory object for the named directory. + */ +static VALUE +dir_initialize(dir, dirname) + VALUE dir, dirname; +{ + struct dir_data *dp; + + FilePathValue(dirname); + Data_Get_Struct(dir, struct dir_data, dp); + if (dp->dir) closedir(dp->dir); + if (dp->path) free(dp->path); + dp->dir = NULL; + dp->path = NULL; + dp->dir = opendir(RSTRING(dirname)->ptr); + if (dp->dir == NULL) { + if (errno == EMFILE || errno == ENFILE) { + rb_gc(); + dp->dir = opendir(RSTRING(dirname)->ptr); + } + if (dp->dir == NULL) { + rb_sys_fail(RSTRING(dirname)->ptr); + } + } + dp->path = strdup(RSTRING(dirname)->ptr); + + return dir; +} + +/* + * call-seq: + * Dir.open( string ) => aDir + * Dir.open( string ) {| aDir | block } => anObject + * + * With no block, <code>open</code> is a synonym for + * <code>Dir::new</code>. If a block is present, it is passed + * <i>aDir</i> as a parameter. The directory is closed at the end of + * the block, and <code>Dir::open</code> returns the value of the + * block. + */ +static VALUE +dir_s_open(klass, dirname) + VALUE klass, dirname; +{ + struct dir_data *dp; + VALUE dir = Data_Make_Struct(klass, struct dir_data, 0, free_dir, dp); + + dir_initialize(dir, dirname); + if (rb_block_given_p()) { + return rb_ensure(rb_yield, dir, dir_close, dir); + } + + return dir; +} + +static void +dir_closed() +{ + rb_raise(rb_eIOError, "closed directory"); +} + +#define GetDIR(obj, dirp) do {\ + Data_Get_Struct(obj, struct dir_data, dirp);\ + if (dirp->dir == NULL) dir_closed();\ +} while (0) + +/* + * call-seq: + * dir.inspect => string + * + * Return a string describing this Dir object. + */ +static VALUE +dir_inspect(dir) + VALUE dir; +{ + struct dir_data *dirp; + + GetDIR(dir, dirp); + if (dirp->path) { + char *c = rb_obj_classname(dir); + int len = strlen(c) + strlen(dirp->path) + 4; + VALUE s = rb_str_new(0, len); + snprintf(RSTRING(s)->ptr, len+1, "#<%s:%s>", c, dirp->path); + return s; + } + return rb_funcall(dir, rb_intern("to_s"), 0, 0); +} + +/* + * call-seq: + * dir.path => string or nil + * + * Returns the path parameter passed to <em>dir</em>'s constructor. + * + * d = Dir.new("..") + * d.path #=> ".." + */ +static VALUE +dir_path(dir) + VALUE dir; +{ + struct dir_data *dirp; + + GetDIR(dir, dirp); + if (!dirp->path) return Qnil; + return rb_str_new2(dirp->path); +} + +/* + * call-seq: + * dir.read => string or nil + * + * Reads the next entry from <em>dir</em> and returns it as a string. + * Returns <code>nil</code> at the end of the stream. + * + * d = Dir.new("testdir") + * d.read #=> "." + * d.read #=> ".." + * d.read #=> "config.h" + */ +static VALUE +dir_read(dir) + VALUE dir; +{ + struct dir_data *dirp; + struct dirent *dp; + + GetDIR(dir, dirp); + errno = 0; + dp = readdir(dirp->dir); + if (dp) { + return rb_tainted_str_new(dp->d_name, NAMLEN(dp)); + } + else if (errno == 0) { /* end of stream */ + return Qnil; + } + else { + rb_sys_fail(0); + } + return Qnil; /* not reached */ +} + +/* + * call-seq: + * dir.each { |filename| block } => dir + * + * Calls the block once for each entry in this directory, passing the + * filename of each entry as a parameter to the block. + * + * d = Dir.new("testdir") + * d.each {|x| puts "Got #{x}" } + * + * <em>produces:</em> + * + * Got . + * Got .. + * Got config.h + * Got main.rb + */ +static VALUE +dir_each(dir) + VALUE dir; +{ + struct dir_data *dirp; + struct dirent *dp; + + GetDIR(dir, dirp); + for (dp = readdir(dirp->dir); dp != NULL; dp = readdir(dirp->dir)) { + rb_yield(rb_tainted_str_new(dp->d_name, NAMLEN(dp))); + if (dirp->dir == NULL) dir_closed(); + } + return dir; +} + +/* + * call-seq: + * dir.pos => integer + * dir.tell => integer + * + * Returns the current position in <em>dir</em>. See also + * <code>Dir#seek</code>. + * + * d = Dir.new("testdir") + * d.tell #=> 0 + * d.read #=> "." + * d.tell #=> 12 + */ +static VALUE +dir_tell(dir) + VALUE dir; +{ +#ifdef HAVE_TELLDIR + struct dir_data *dirp; + long pos; + + GetDIR(dir, dirp); + pos = telldir(dirp->dir); + return rb_int2inum(pos); +#else + rb_notimplement(); +#endif +} + +/* + * call-seq: + * dir.seek( integer ) => dir + * + * Seeks to a particular location in <em>dir</em>. <i>integer</i> + * must be a value returned by <code>Dir#tell</code>. + * + * d = Dir.new("testdir") #=> #<Dir:0x401b3c40> + * d.read #=> "." + * i = d.tell #=> 12 + * d.read #=> ".." + * d.seek(i) #=> #<Dir:0x401b3c40> + * d.read #=> ".." + */ +static VALUE +dir_seek(dir, pos) + VALUE dir, pos; +{ + struct dir_data *dirp; + off_t p = NUM2OFFT(pos); + + GetDIR(dir, dirp); +#ifdef HAVE_SEEKDIR + seekdir(dirp->dir, p); + return dir; +#else + rb_notimplement(); +#endif +} + +/* + * call-seq: + * dir.pos( integer ) => integer + * + * Synonym for <code>Dir#seek</code>, but returns the position + * parameter. + * + * d = Dir.new("testdir") #=> #<Dir:0x401b3c40> + * d.read #=> "." + * i = d.pos #=> 12 + * d.read #=> ".." + * d.pos = i #=> 12 + * d.read #=> ".." + */ +static VALUE +dir_set_pos(dir, pos) + VALUE dir, pos; +{ + dir_seek(dir, pos); + return pos; +} + +/* + * call-seq: + * dir.rewind => dir + * + * Repositions <em>dir</em> to the first entry. + * + * d = Dir.new("testdir") + * d.read #=> "." + * d.rewind #=> #<Dir:0x401b3fb0> + * d.read #=> "." + */ +static VALUE +dir_rewind(dir) + VALUE dir; +{ + struct dir_data *dirp; + + GetDIR(dir, dirp); + rewinddir(dirp->dir); + return dir; +} + +/* + * call-seq: + * dir.close => nil + * + * Closes the directory stream. Any further attempts to access + * <em>dir</em> will raise an <code>IOError</code>. + * + * d = Dir.new("testdir") + * d.close #=> nil + */ +static VALUE +dir_close(dir) + VALUE dir; +{ + struct dir_data *dirp; + + GetDIR(dir, dirp); + closedir(dirp->dir); + dirp->dir = NULL; + + return Qnil; +} + +static void +dir_chdir(path) + VALUE path; +{ + if (chdir(RSTRING(path)->ptr) < 0) + rb_sys_fail(RSTRING(path)->ptr); +} + +static int chdir_blocking = 0; +static VALUE chdir_thread = Qnil; + +struct chdir_data { + VALUE old_path, new_path; + int done; +}; + +static VALUE +chdir_yield(args) + struct chdir_data *args; +{ + dir_chdir(args->new_path); + args->done = Qtrue; + chdir_blocking++; + if (chdir_thread == Qnil) + chdir_thread = rb_thread_current(); + return rb_yield(args->new_path); +} + +static VALUE +chdir_restore(args) + struct chdir_data *args; +{ + if (args->done) { + chdir_blocking--; + if (chdir_blocking == 0) + chdir_thread = Qnil; + dir_chdir(args->old_path); + } + return Qnil; +} + +/* + * call-seq: + * Dir.chdir( [ string] ) => 0 + * Dir.chdir( [ string] ) {| path | block } => anObject + * + * Changes the current working directory of the process to the given + * string. When called without an argument, changes the directory to + * the value of the environment variable <code>HOME</code>, or + * <code>LOGDIR</code>. <code>SystemCallError</code> (probably + * <code>Errno::ENOENT</code>) if the target directory does not exist. + * + * If a block is given, it is passed the name of the new current + * directory, and the block is executed with that as the current + * directory. The original working directory is restored when the block + * exits. The return value of <code>chdir</code> is the value of the + * block. <code>chdir</code> blocks can be nested, but in a + * multi-threaded program an error will be raised if a thread attempts + * to open a <code>chdir</code> block while another thread has one + * open. + * + * Dir.chdir("/var/spool/mail") + * puts Dir.pwd + * Dir.chdir("/tmp") do + * puts Dir.pwd + * Dir.chdir("/usr") do + * puts Dir.pwd + * end + * puts Dir.pwd + * end + * puts Dir.pwd + * + * <em>produces:</em> + * + * /var/spool/mail + * /tmp + * /usr + * /tmp + * /var/spool/mail + */ +static VALUE +dir_s_chdir(argc, argv, obj) + int argc; + VALUE *argv; + VALUE obj; +{ + VALUE path = Qnil; + + rb_secure(2); + if (rb_scan_args(argc, argv, "01", &path) == 1) { + FilePathValue(path); + } + else { + const char *dist = getenv("HOME"); + if (!dist) { + dist = getenv("LOGDIR"); + if (!dist) rb_raise(rb_eArgError, "HOME/LOGDIR not set"); + } + path = rb_str_new2(dist); + } + + if (chdir_blocking > 0) { + if (!rb_block_given_p() || rb_thread_current() != chdir_thread) + rb_warn("conflicting chdir during another chdir block"); + } + + if (rb_block_given_p()) { + struct chdir_data args; + char *cwd = my_getcwd(); + + args.old_path = rb_tainted_str_new2(cwd); free(cwd); + args.new_path = path; + args.done = Qfalse; + return rb_ensure(chdir_yield, (VALUE)&args, chdir_restore, (VALUE)&args); + } + dir_chdir(path); + + return INT2FIX(0); +} + +/* + * call-seq: + * Dir.getwd => string + * Dir.pwd => string + * + * Returns the path to the current working directory of this process as + * a string. + * + * Dir.chdir("/tmp") #=> 0 + * Dir.getwd #=> "/tmp" + */ +static VALUE +dir_s_getwd(dir) + VALUE dir; +{ + char *path; + VALUE cwd; + + rb_secure(4); + path = my_getcwd(); + cwd = rb_tainted_str_new2(path); + + free(path); + return cwd; +} + +static void check_dirname _((volatile VALUE *)); +static void +check_dirname(dir) + volatile VALUE *dir; +{ + char *path, *pend; + + rb_secure(2); + FilePathValue(*dir); + path = RSTRING(*dir)->ptr; + if (path && *(pend = rb_path_end(rb_path_skip_prefix(path)))) { + *dir = rb_str_new(path, pend - path); + } +} + +/* + * call-seq: + * Dir.chroot( string ) => 0 + * + * Changes this process's idea of the file system root. Only a + * privileged process may make this call. Not available on all + * platforms. On Unix systems, see <code>chroot(2)</code> for more + * information. + */ +static VALUE +dir_s_chroot(dir, path) + VALUE dir, path; +{ +#if defined(HAVE_CHROOT) && !defined(__CHECKER__) + check_dirname(&path); + + if (chroot(RSTRING(path)->ptr) == -1) + rb_sys_fail(RSTRING(path)->ptr); + + return INT2FIX(0); +#else + rb_notimplement(); + return Qnil; /* not reached */ +#endif +} + +/* + * call-seq: + * Dir.mkdir( string [, integer] ) => 0 + * + * Makes a new directory named by <i>string</i>, with permissions + * specified by the optional parameter <i>anInteger</i>. The + * permissions may be modified by the value of + * <code>File::umask</code>, and are ignored on NT. Raises a + * <code>SystemCallError</code> if the directory cannot be created. See + * also the discussion of permissions in the class documentation for + * <code>File</code>. + * + */ +static VALUE +dir_s_mkdir(argc, argv, obj) + int argc; + VALUE *argv; + VALUE obj; +{ + VALUE path, vmode; + int mode; + + if (rb_scan_args(argc, argv, "11", &path, &vmode) == 2) { + mode = NUM2INT(vmode); + } + else { + mode = 0777; + } + + check_dirname(&path); + if (mkdir(RSTRING(path)->ptr, mode) == -1) + rb_sys_fail(RSTRING(path)->ptr); + + return INT2FIX(0); +} + +/* + * call-seq: + * Dir.delete( string ) => 0 + * Dir.rmdir( string ) => 0 + * Dir.unlink( string ) => 0 + * + * Deletes the named directory. Raises a subclass of + * <code>SystemCallError</code> if the directory isn't empty. + */ +static VALUE +dir_s_rmdir(obj, dir) + VALUE obj, dir; +{ + check_dirname(&dir); + if (rmdir(RSTRING(dir)->ptr) < 0) + rb_sys_fail(RSTRING(dir)->ptr); + + return INT2FIX(0); +} + +/* System call with warning */ +static int +do_stat(path, pst) + const char *path; + struct stat *pst; +{ + int ret = stat(path, pst); + if (ret < 0 && errno != ENOENT) + rb_sys_warning(path); + + return ret; +} + +static int +do_lstat(path, pst) + const char *path; + struct stat *pst; +{ + int ret = lstat(path, pst); + if (ret < 0 && errno != ENOENT) + rb_sys_warning(path); + + return ret; +} + +static DIR * +do_opendir(path) + const char *path; +{ + DIR *dirp = opendir(path); + if (dirp == NULL && errno != ENOENT && errno != ENOTDIR) + rb_sys_warning(path); + + return dirp; +} + +/* Return nonzero if S has any special globbing chars in it. */ +static int +has_magic(s, flags) + const char *s; + int flags; +{ + const int escape = !(flags & FNM_NOESCAPE); + + register const char *p = s; + register char c; + + while (c = *p++) { + switch (c) { + case '*': + case '?': + case '[': + return 1; + + case '\\': + if (escape && !(c = *p++)) + return 0; + continue; + } + + p = Next(p-1); + } + + return 0; +} + +/* Find separator in globbing pattern. */ +static char * +find_dirsep(s, flags) + const char *s; + int flags; +{ + const int escape = !(flags & FNM_NOESCAPE); + + register const char *p = s; + register char c; + int open = 0; + + while (c = *p++) { + switch (c) { + case '[': + open = 1; + continue; + case ']': + open = 0; + continue; + + case '/': + if (!open) + return (char *)p-1; + continue; + + case '\\': + if (escape && !(c = *p++)) + return (char *)p-1; + continue; + } + + p = Next(p-1); + } + + return (char *)p-1; +} + +/* Remove escaping baskclashes */ +static void +remove_backslashes(p) + char *p; +{ + char *t = p; + char *s = p; + + while (*p) { + if (*p == '\\') { + if (t != s) + memmove(t, s, p - s); + t += p - s; + s = ++p; + if (!*p) break; + } + Inc(p); + } + + while (*p++); + + if (t != s) + memmove(t, s, p - s); /* move '\0' too */ +} + +/* Globing pattern */ +enum glob_pattern_type { PLAIN, MAGICAL, RECURSIVE, MATCH_ALL, MATCH_DIR }; + +struct glob_pattern { + char *str; + enum glob_pattern_type type; + struct glob_pattern *next; +}; + +static struct glob_pattern * +glob_make_pattern(p, flags) + const char *p; + int flags; +{ + struct glob_pattern *list, *tmp, **tail = &list; + int dirsep = 0; /* pattern is terminated with '/' */ + + while (*p) { + tmp = ALLOC(struct glob_pattern); + if (p[0] == '*' && p[1] == '*' && p[2] == '/') { + /* fold continuous RECURSIVEs (needed in glob_helper) */ + do { p += 3; } while (p[0] == '*' && p[1] == '*' && p[2] == '/'); + tmp->type = RECURSIVE; + tmp->str = 0; + dirsep = 1; + } + else { + const char *m = find_dirsep(p, flags); + char *buf = ALLOC_N(char, m-p+1); + memcpy(buf, p, m-p); + buf[m-p] = '\0'; + tmp->type = has_magic(buf, flags) ? MAGICAL : PLAIN; + tmp->str = buf; + if (*m) { + dirsep = 1; + p = m + 1; + } + else { + dirsep = 0; + p = m; + } + } + *tail = tmp; + tail = &tmp->next; + } + + tmp = ALLOC(struct glob_pattern); + tmp->type = dirsep ? MATCH_DIR : MATCH_ALL; + tmp->str = 0; + *tail = tmp; + tmp->next = 0; + + return list; +} + +static void +glob_free_pattern(list) + struct glob_pattern *list; +{ + while (list) { + struct glob_pattern *tmp = list; + list = list->next; + if (tmp->str) + free(tmp->str); + free(tmp); + } +} + +static VALUE +join_path(path, dirsep, name) + VALUE path; + int dirsep; + const char *name; +{ + long len = RSTRING(path)->len; + VALUE buf = rb_str_new(0, RSTRING(path)->len+strlen(name)+(dirsep?1:0)); + + memcpy(RSTRING(buf)->ptr, RSTRING(path)->ptr, len); + if (dirsep) { + strcpy(RSTRING(buf)->ptr+len, "/"); + len++; + } + strcpy(RSTRING(buf)->ptr+len, name); + return buf; +} + +enum answer { YES, NO, UNKNOWN }; + +#ifndef S_ISDIR +# define S_ISDIR(m) ((m & S_IFMT) == S_IFDIR) +#endif + +#ifndef S_ISLNK +# ifndef S_IFLNK +# define S_ISLNK(m) (0) +# else +# define S_ISLNK(m) ((m & S_IFMT) == S_IFLNK) +# endif +#endif + +struct glob_args { + void (*func) _((VALUE, VALUE)); + VALUE c; + VALUE v; +}; + +static VALUE glob_func_caller _((VALUE)); + +static VALUE +glob_func_caller(val) + VALUE val; +{ + struct glob_args *args = (struct glob_args *)val; + VALUE path = args->c; + + OBJ_TAINT(path); + (*args->func)(path, args->v); + return Qnil; +} + +static int +glob_call_func(func, path, arg) + void (*func) _((VALUE, VALUE)); + VALUE path; + VALUE arg; +{ + int status; + struct glob_args args; + + args.func = func; + args.c = path; + args.v = arg; + + rb_protect(glob_func_caller, (VALUE)&args, &status); + return status; +} + +static int +glob_helper(path, dirsep, exist, isdir, beg, end, flags, func, arg) + VALUE path; + int dirsep; /* '/' should be placed before appending child entry's name to 'path'. */ + enum answer exist; /* Does 'path' indicate an existing entry? */ + enum answer isdir; /* Does 'path' indicate a directory or a symlink to a directory? */ + struct glob_pattern **beg; + struct glob_pattern **end; + int flags; + void (*func) _((VALUE, VALUE)); + VALUE arg; +{ + struct stat st; + int status = 0; + struct glob_pattern **cur, **new_beg, **new_end; + int plain = 0, magical = 0, recursive = 0, match_all = 0, match_dir = 0; + int escape = !(flags & FNM_NOESCAPE); + + for (cur = beg; cur < end; ++cur) { + struct glob_pattern *p = *cur; + if (p->type == RECURSIVE) { + recursive = 1; + p = p->next; + } + switch (p->type) { + case PLAIN: + plain = 1; + break; + case MAGICAL: + magical = 1; + break; + case MATCH_ALL: + match_all = 1; + break; + case MATCH_DIR: + match_dir = 1; + break; + } + } + + if (RSTRING(path)->len > 0) { + if (match_all && exist == UNKNOWN) { + if (do_lstat(RSTRING(path)->ptr, &st) == 0) { + exist = YES; + isdir = S_ISDIR(st.st_mode) ? YES : S_ISLNK(st.st_mode) ? UNKNOWN : NO; + } + else { + exist = NO; + isdir = NO; + } + } + + if (match_dir && isdir == UNKNOWN) { + if (do_stat(RSTRING(path)->ptr, &st) == 0) { + exist = YES; + isdir = S_ISDIR(st.st_mode) ? YES : NO; + } + else { + exist = NO; + isdir = NO; + } + } + + if (match_all && exist == YES) { + status = glob_call_func(func, path, arg); + if (status) return status; + } + + if (match_dir && isdir == YES) { + status = glob_call_func(func, join_path(path, dirsep, ""), arg); + if (status) return status; + } + } + + if (exist == NO || isdir == NO) return 0; + + if (magical || recursive) { + struct dirent *dp; + DIR *dirp = do_opendir(RSTRING(path)->len > 0 ? RSTRING(path)->ptr : "."); + if (dirp == NULL) return 0; + + for (dp = readdir(dirp); dp != NULL; dp = readdir(dirp)) { + VALUE buf = join_path(path, dirsep, dp->d_name); + + enum answer new_isdir = UNKNOWN; + if (recursive && strcmp(dp->d_name, ".") != 0 && strcmp(dp->d_name, "..") != 0 + && fnmatch("*", dp->d_name, flags) == 0) { +#ifndef _WIN32 + if (do_lstat(RSTRING(buf)->ptr, &st) == 0) + new_isdir = S_ISDIR(st.st_mode) ? YES : S_ISLNK(st.st_mode) ? UNKNOWN : NO; + else + new_isdir = NO; +#else + new_isdir = dp->d_isdir ? (!dp->d_isrep ? YES : UNKNOWN) : NO; +#endif + } + + new_beg = new_end = ALLOC_N(struct glob_pattern *, (end - beg) * 2); + + for (cur = beg; cur < end; ++cur) { + struct glob_pattern *p = *cur; + if (p->type == RECURSIVE) { + if (new_isdir == YES) /* not symlink but real directory */ + *new_end++ = p; /* append recursive pattern */ + p = p->next; /* 0 times recursion */ + } + if (p->type == PLAIN || p->type == MAGICAL) { + if (fnmatch(p->str, dp->d_name, flags) == 0) + *new_end++ = p->next; + } + } + + status = glob_helper(buf, 1, YES, new_isdir, new_beg, new_end, flags, func, arg); + free(new_beg); + if (status) break; + } + + closedir(dirp); + } + else if (plain) { + struct glob_pattern **copy_beg, **copy_end, **cur2; + + copy_beg = copy_end = ALLOC_N(struct glob_pattern *, end - beg); + for (cur = beg; cur < end; ++cur) + *copy_end++ = (*cur)->type == PLAIN ? *cur : 0; + + for (cur = copy_beg; cur < copy_end; ++cur) { + if (*cur) { + VALUE buf; + char *name; + name = ALLOC_N(char, strlen((*cur)->str) + 1); + strcpy(name, (*cur)->str); + if (escape) remove_backslashes(name); + + new_beg = new_end = ALLOC_N(struct glob_pattern *, end - beg); + *new_end++ = (*cur)->next; + for (cur2 = cur + 1; cur2 < copy_end; ++cur2) { + if (*cur2 && fnmatch((*cur2)->str, name, flags) == 0) { + *new_end++ = (*cur2)->next; + *cur2 = 0; + } + } + + buf = join_path(path, dirsep, name); + free(name); + status = glob_helper(buf, 1, UNKNOWN, UNKNOWN, new_beg, new_end, flags, func, arg); + free(new_beg); + if (status) break; + } + } + + free(copy_beg); + } + + return status; +} + +static int +rb_glob2(path, offset, flags, func, arg) + VALUE path; + long offset; + int flags; + void (*func) _((VALUE, VALUE)); + VALUE arg; +{ + struct glob_pattern *list; + const char *root, *start; + VALUE buf; + int n; + int status; + + if (flags & FNM_CASEFOLD) { + rb_warn("Dir.glob() ignores File::FNM_CASEFOLD"); + } + + start = root = StringValuePtr(path) + offset; +#if defined DOSISH + flags |= FNM_CASEFOLD; + root = rb_path_skip_prefix(root); +#else + flags &= ~FNM_CASEFOLD; +#endif + + if (root && *root == '/') root++; + + n = root - start; + buf = rb_str_new(start, n); + + list = glob_make_pattern(root, flags); + status = glob_helper(buf, 0, UNKNOWN, UNKNOWN, &list, &list + 1, flags, func, arg); + glob_free_pattern(list); + + return status; +} + +struct rb_glob_args { + void (*func) _((const char*, VALUE)); + VALUE arg; +}; + +static VALUE +rb_glob_caller(path, a) + VALUE path, a; +{ + struct rb_glob_args *args = (struct rb_glob_args *)a; + (*args->func)(RSTRING(path)->ptr, args->arg); + return Qnil; +} + +void +rb_glob(path, func, arg) + const char *path; + void (*func) _((const char*, VALUE)); + VALUE arg; +{ + struct rb_glob_args args; + int status; + + args.func = func; + args.arg = arg; + status = rb_glob2(rb_str_new2(path), 0, 0, rb_glob_caller, &args); + + if (status) rb_jump_tag(status); +} + +static void +push_pattern(path, ary) + VALUE path, ary; +{ + rb_ary_push(ary, path); +} + +static int +push_glob(VALUE ary, VALUE s, long offset, int flags); + +static int +push_glob(ary, str, offset, flags) + VALUE ary; + VALUE str; + long offset; + int flags; +{ + const int escape = !(flags & FNM_NOESCAPE); + + const char *p = RSTRING(str)->ptr + offset; + const char *s = p; + const char *lbrace = 0, *rbrace = 0; + int nest = 0, status = 0; + + while (*p) { + if (*p == '{' && nest++ == 0) { + lbrace = p; + } + if (*p == '}' && --nest <= 0) { + rbrace = p; + break; + } + if (*p == '\\' && escape) { + if (!*++p) break; + } + Inc(p); + } + + if (lbrace && rbrace) { + VALUE buffer = rb_str_new(0, strlen(s)); + char *buf; + long shift; + + buf = RSTRING(buffer)->ptr; + memcpy(buf, s, lbrace-s); + shift = (lbrace-s); + p = lbrace; + while (p < rbrace) { + const char *t = ++p; + nest = 0; + while (p < rbrace && !(*p == ',' && nest == 0)) { + if (*p == '{') nest++; + if (*p == '}') nest--; + if (*p == '\\' && escape) { + if (++p == rbrace) break; + } + Inc(p); + } + memcpy(buf+shift, t, p-t); + strcpy(buf+shift+(p-t), rbrace+1); + status = push_glob(ary, buffer, offset, flags); + if (status) break; + } + } + else if (!lbrace && !rbrace) { + status = rb_glob2(str, offset, flags, push_pattern, ary); + } + + return status; +} + +static VALUE +rb_push_glob(str, flags) /* '\0' is delimiter */ + VALUE str; + int flags; +{ + long offset = 0; + VALUE ary; + + FilePathValue(str); + + ary = rb_ary_new(); + + while (offset < RSTRING(str)->len) { + int status = push_glob(ary, str, offset, flags); + char *p, *pend; + if (status) rb_jump_tag(status); + p = RSTRING(str)->ptr + offset; + p += strlen(p) + 1; + pend = RSTRING(str)->ptr + RSTRING(str)->len; + while (p < pend && !*p) + p++; + offset = p - RSTRING(str)->ptr; + } + + if (rb_block_given_p()) { + rb_ary_each(ary); + return Qnil; + } + return ary; +} + +/* + * call-seq: + * Dir[ string ] => array + * + * Equivalent to calling + * <em>dir</em>.<code>glob(</code><i>string,</i><code>0)</code>. + * + */ +static VALUE +dir_s_aref(obj, str) + VALUE obj, str; +{ + return rb_push_glob(str, 0); +} + +/* + * call-seq: + * Dir.glob( string, [flags] ) => array + * Dir.glob( string, [flags] ) {| filename | block } => nil + * + * Returns the filenames found by expanding the pattern given in + * <i>string</i>, either as an <i>array</i> or as parameters to the + * block. Note that this pattern is not a regexp (it's closer to a + * shell glob). See <code>File::fnmatch</code> for the meaning of + * the <i>flags</i> parameter. Note that case sensitivity + * depends on your system (so <code>File::FNM_CASEFOLD</code> is ignored) + * + * <code>*</code>:: Matches any file. Can be restricted by + * other values in the glob. <code>*</code> + * will match all files; <code>c*</code> will + * match all files beginning with + * <code>c</code>; <code>*c</code> will match + * all files ending with <code>c</code>; and + * <code>*c*</code> will match all files that + * have <code>c</code> in them (including at + * the beginning or end). Equivalent to + * <code>/ .* /x</code> in regexp. + * <code>**</code>:: Matches directories recursively. + * <code>?</code>:: Matches any one character. Equivalent to + * <code>/.{1}/</code> in regexp. + * <code>[set]</code>:: Matches any one character in +set+. + * Behaves exactly like character sets in + * Regexp, including set negation + * (<code>[^a-z]</code>). + * <code>{p,q}</code>:: Matches either literal <code>p</code> or + * literal <code>q</code>. Matching literals + * may be more than one character in length. + * More than two literals may be specified. + * Equivalent to pattern alternation in + * regexp. + * <code>\</code>:: Escapes the next metacharacter. + * + * Dir["config.?"] #=> ["config.h"] + * Dir.glob("config.?") #=> ["config.h"] + * Dir.glob("*.[a-z][a-z]") #=> ["main.rb"] + * Dir.glob("*.[^r]*") #=> ["config.h"] + * Dir.glob("*.{rb,h}") #=> ["main.rb", "config.h"] + * Dir.glob("*") #=> ["config.h", "main.rb"] + * Dir.glob("*", File::FNM_DOTMATCH) #=> [".", "..", "config.h", "main.rb"] + * + * rbfiles = File.join("**", "*.rb") + * Dir.glob(rbfiles) #=> ["main.rb", + * "lib/song.rb", + * "lib/song/karaoke.rb"] + * libdirs = File.join("**", "lib") + * Dir.glob(libdirs) #=> ["lib"] + * + * librbfiles = File.join("**", "lib", "**", "*.rb") + * Dir.glob(librbfiles) #=> ["lib/song.rb", + * "lib/song/karaoke.rb"] + * + * librbfiles = File.join("**", "lib", "*.rb") + * Dir.glob(librbfiles) #=> ["lib/song.rb"] + */ +static VALUE +dir_s_glob(argc, argv, obj) + int argc; + VALUE *argv; + VALUE obj; +{ + VALUE str, rflags; + int flags; + + if (rb_scan_args(argc, argv, "11", &str, &rflags) == 2) + flags = NUM2INT(rflags); + else + flags = 0; + + return rb_push_glob(str, flags); +} + +static VALUE +dir_open_dir(path) + VALUE path; +{ + struct dir_data *dp; + VALUE dir = rb_funcall(rb_cDir, rb_intern("open"), 1, path); + + if (TYPE(dir) != T_DATA || + RDATA(dir)->dfree != (RUBY_DATA_FUNC)free_dir) { + rb_raise(rb_eTypeError, "wrong argument type %s (expected Dir)", + rb_obj_classname(dir)); + } + return dir; +} + + +/* + * call-seq: + * Dir.foreach( dirname ) {| filename | block } => nil + * + * Calls the block once for each entry in the named directory, passing + * the filename of each entry as a parameter to the block. + * + * Dir.foreach("testdir") {|x| puts "Got #{x}" } + * + * <em>produces:</em> + * + * Got . + * Got .. + * Got config.h + * Got main.rb + * + */ +static VALUE +dir_foreach(io, dirname) + VALUE io, dirname; +{ + VALUE dir; + + dir = dir_open_dir(dirname); + rb_ensure(dir_each, dir, dir_close, dir); + return Qnil; +} + +/* + * call-seq: + * Dir.entries( dirname ) => array + * + * Returns an array containing all of the filenames in the given + * directory. Will raise a <code>SystemCallError</code> if the named + * directory doesn't exist. + * + * Dir.entries("testdir") #=> [".", "..", "config.h", "main.rb"] + * + */ +static VALUE +dir_entries(io, dirname) + VALUE io, dirname; +{ + VALUE dir; + + dir = dir_open_dir(dirname); + return rb_ensure(rb_Array, dir, dir_close, dir); +} + +/* + * call-seq: + * File.fnmatch( pattern, path, [flags] ) => (true or false) + * File.fnmatch?( pattern, path, [flags] ) => (true or false) + * + * Returns true if <i>path</i> matches against <i>pattern</i> The + * pattern is not a regular expression; instead it follows rules + * similar to shell filename globbing. It may contain the following + * metacharacters: + * + * <code>*</code>:: Matches any file. Can be restricted by + * other values in the glob. <code>*</code> + * will match all files; <code>c*</code> will + * match all files beginning with + * <code>c</code>; <code>*c</code> will match + * all files ending with <code>c</code>; and + * <code>*c*</code> will match all files that + * have <code>c</code> in them (including at + * the beginning or end). Equivalent to + * <code>/ .* /x</code> in regexp. + * <code>**</code>:: Matches directories recursively or files + * expansively. + * <code>?</code>:: Matches any one character. Equivalent to + * <code>/.{1}/</code> in regexp. + * <code>[set]</code>:: Matches any one character in +set+. + * Behaves exactly like character sets in + * Regexp, including set negation + * (<code>[^a-z]</code>). + * <code>\</code>:: Escapes the next metacharacter. + * + * <i>flags</i> is a bitwise OR of the <code>FNM_xxx</code> + * parameters. The same glob pattern and flags are used by + * <code>Dir::glob</code>. + * + * File.fnmatch('cat', 'cat') #=> true : match entire string + * File.fnmatch('cat', 'category') #=> false : only match partial string + * File.fnmatch('c{at,ub}s', 'cats') #=> false : { } isn't supported + * + * File.fnmatch('c?t', 'cat') #=> true : '?' match only 1 character + * File.fnmatch('c??t', 'cat') #=> false : ditto + * File.fnmatch('c*', 'cats') #=> true : '*' match 0 or more characters + * File.fnmatch('c*t', 'c/a/b/t') #=> true : ditto + * File.fnmatch('ca[a-z]', 'cat') #=> true : inclusive bracket expression + * File.fnmatch('ca[^t]', 'cat') #=> false : exclusive bracket expression ('^' or '!') + * + * File.fnmatch('cat', 'CAT') #=> false : case sensitive + * File.fnmatch('cat', 'CAT', File::FNM_CASEFOLD) #=> true : case insensitive + * + * File.fnmatch('?', '/', File::FNM_PATHNAME) #=> false : wildcard doesn't match '/' on FNM_PATHNAME + * File.fnmatch('*', '/', File::FNM_PATHNAME) #=> false : ditto + * File.fnmatch('[/]', '/', File::FNM_PATHNAME) #=> false : ditto + * + * File.fnmatch('\?', '?') #=> true : escaped wildcard becomes ordinary + * File.fnmatch('\a', 'a') #=> true : escaped ordinary remains ordinary + * File.fnmatch('\a', '\a', File::FNM_NOESCAPE) #=> true : FNM_NOESACPE makes '\' ordinary + * File.fnmatch('[\?]', '?') #=> true : can escape inside bracket expression + * + * File.fnmatch('*', '.profile') #=> false : wildcard doesn't match leading + * File.fnmatch('*', '.profile', File::FNM_DOTMATCH) #=> true period by default. + * File.fnmatch('.*', '.profile') #=> true + * + * rbfiles = File.join("**", "*.rb") + * File.fnmatch(rbfiles, 'main.rb') #=> false + * File.fnmatch(rbfiles, './main.rb') #=> false + * File.fnmatch(rbfiles, 'lib/song.rb') #=> true + * File.fnmatch('**.rb', 'main.rb') #=> true + * File.fnmatch('**.rb', './main.rb') #=> false + * File.fnmatch('**.rb', 'lib/song.rb') #=> true + * File.fnmatch('*', 'dave/.profile') #=> true + * + * File.fnmatch('* IGNORE /*', 'dave/.profile', File::FNM_PATHNAME) #=> false + * File.fnmatch('* IGNORE /*', 'dave/.profile', File::FNM_PATHNAME | File::FNM_DOTMATCH) #=> true + * + * File.fnmatch('** IGNORE /foo', 'a/b/c/foo', File::FNM_PATHNAME) #=> true + * File.fnmatch('** IGNORE /foo', '/a/b/c/foo', File::FNM_PATHNAME) #=> true + * File.fnmatch('** IGNORE /foo', 'c:/a/b/c/foo', File::FNM_PATHNAME) #=> true + * File.fnmatch('** IGNORE /foo', 'a/.b/c/foo', File::FNM_PATHNAME) #=> false + * File.fnmatch('** IGNORE /foo', 'a/.b/c/foo', File::FNM_PATHNAME | File::FNM_DOTMATCH) #=> true + */ +static VALUE +file_s_fnmatch(argc, argv, obj) + int argc; + VALUE *argv; + VALUE obj; +{ + VALUE pattern, path; + VALUE rflags; + int flags; + + if (rb_scan_args(argc, argv, "21", &pattern, &path, &rflags) == 3) + flags = NUM2INT(rflags); + else + flags = 0; + + StringValue(pattern); + StringValue(path); + + if (fnmatch(RSTRING(pattern)->ptr, RSTRING(path)->ptr, flags) == 0) + return Qtrue; + + return Qfalse; +} + +/* + * Objects of class <code>Dir</code> are directory streams representing + * directories in the underlying file system. They provide a variety of + * ways to list directories and their contents. See also + * <code>File</code>. + * + * The directory used in these examples contains the two regular files + * (<code>config.h</code> and <code>main.rb</code>), the parent + * directory (<code>..</code>), and the directory itself + * (<code>.</code>). + */ +void +Init_Dir() +{ + rb_cDir = rb_define_class("Dir", rb_cObject); + + rb_include_module(rb_cDir, rb_mEnumerable); + + rb_define_alloc_func(rb_cDir, dir_s_alloc); + rb_define_singleton_method(rb_cDir, "open", dir_s_open, 1); + rb_define_singleton_method(rb_cDir, "foreach", dir_foreach, 1); + rb_define_singleton_method(rb_cDir, "entries", dir_entries, 1); + + rb_define_method(rb_cDir,"initialize", dir_initialize, 1); + rb_define_method(rb_cDir,"path", dir_path, 0); + rb_define_method(rb_cDir,"inspect", dir_inspect, 0); + rb_define_method(rb_cDir,"read", dir_read, 0); + rb_define_method(rb_cDir,"each", dir_each, 0); + rb_define_method(rb_cDir,"rewind", dir_rewind, 0); + rb_define_method(rb_cDir,"tell", dir_tell, 0); + rb_define_method(rb_cDir,"seek", dir_seek, 1); + rb_define_method(rb_cDir,"pos", dir_tell, 0); + rb_define_method(rb_cDir,"pos=", dir_set_pos, 1); + rb_define_method(rb_cDir,"close", dir_close, 0); + + rb_define_singleton_method(rb_cDir,"chdir", dir_s_chdir, -1); + rb_define_singleton_method(rb_cDir,"getwd", dir_s_getwd, 0); + rb_define_singleton_method(rb_cDir,"pwd", dir_s_getwd, 0); + rb_define_singleton_method(rb_cDir,"chroot", dir_s_chroot, 1); + rb_define_singleton_method(rb_cDir,"mkdir", dir_s_mkdir, -1); + rb_define_singleton_method(rb_cDir,"rmdir", dir_s_rmdir, 1); + rb_define_singleton_method(rb_cDir,"delete", dir_s_rmdir, 1); + rb_define_singleton_method(rb_cDir,"unlink", dir_s_rmdir, 1); + + rb_define_singleton_method(rb_cDir,"glob", dir_s_glob, -1); + rb_define_singleton_method(rb_cDir,"[]", dir_s_aref, 1); + + rb_define_singleton_method(rb_cFile,"fnmatch", file_s_fnmatch, -1); + rb_define_singleton_method(rb_cFile,"fnmatch?", file_s_fnmatch, -1); + + rb_file_const("FNM_NOESCAPE", INT2FIX(FNM_NOESCAPE)); + rb_file_const("FNM_PATHNAME", INT2FIX(FNM_PATHNAME)); + rb_file_const("FNM_DOTMATCH", INT2FIX(FNM_DOTMATCH)); + rb_file_const("FNM_CASEFOLD", INT2FIX(FNM_CASEFOLD)); +} +/********************************************************************** + + dln.c - + + $Author: nobu $ + $Date: 2005/04/20 14:22:57 $ + created at: Tue Jan 18 17:05:06 JST 1994 + + Copyright (C) 1993-2003 Yukihiro Matsumoto + +**********************************************************************/ + +#include "ruby.h" +#include "dln.h" + +#ifdef HAVE_STDLIB_H +# include <stdlib.h> +#endif + +#ifdef __CHECKER__ +#undef HAVE_DLOPEN +#undef USE_DLN_A_OUT +#undef USE_DLN_DLOPEN +#endif + +#ifdef USE_DLN_A_OUT +char *dln_argv0; +#endif + +#ifdef _AIX +#pragma alloca +#endif + +#if defined(HAVE_ALLOCA_H) +#include <alloca.h> +#endif + +#ifdef HAVE_STRING_H +# include <string.h> +#else +# include <strings.h> +#endif + +#ifndef xmalloc +void *xmalloc(); +void *xcalloc(); +void *xrealloc(); +#endif + +#include <stdio.h> +#if defined(_WIN32) || defined(__VMS) +#include "missing/file.h" +#endif +#include <sys/types.h> +#include <sys/stat.h> + +#ifndef S_ISDIR +# define S_ISDIR(m) ((m & S_IFMT) == S_IFDIR) +#endif + +#ifdef HAVE_SYS_PARAM_H +# include <sys/param.h> +#endif +#ifndef MAXPATHLEN +# define MAXPATHLEN 1024 +#endif + +#ifdef HAVE_UNISTD_H +# include <unistd.h> +#endif + +#ifndef _WIN32 +char *getenv(); +#endif + +#if defined(__VMS) +#pragma builtins +#include <dlfcn.h> +#endif + +#ifdef __MACOS__ +# include <TextUtils.h> +# include <CodeFragments.h> +# include <Aliases.h> +# include "macruby_private.h" +#endif + +#ifdef __BEOS__ +# include <image.h> +#endif + +int eaccess(); + +#ifndef NO_DLN_LOAD + +#if defined(HAVE_DLOPEN) && !defined(USE_DLN_A_OUT) && !defined(_AIX) && !defined(__APPLE__) && !defined(_UNICOSMP) +/* dynamic load with dlopen() */ +# define USE_DLN_DLOPEN +#endif + +#ifndef FUNCNAME_PATTERN +# if defined(__hp9000s300) || (defined(__NetBSD__) && !defined(__ELF__)) || defined(__BORLANDC__) || (defined(__FreeBSD__) && !defined(__ELF__)) || (defined(__OpenBSD__) && !defined(__ELF__)) || defined(NeXT) || defined(__WATCOMC__) || defined(__APPLE__) +# define FUNCNAME_PATTERN "_Init_%s" +# else +# define FUNCNAME_PATTERN "Init_%s" +# endif +#endif + +static int +init_funcname_len(buf, file) + char **buf; + const char *file; +{ + char *p; + const char *slash; + int len; + + /* Load the file as an object one */ + for (slash = file-1; *file; file++) /* Find position of last '/' */ +#ifdef __MACOS__ + if (*file == ':') slash = file; +#else + if (*file == '/') slash = file; +#endif + + len = strlen(FUNCNAME_PATTERN) + strlen(slash + 1); + *buf = xmalloc(len); + snprintf(*buf, len, FUNCNAME_PATTERN, slash + 1); + for (p = *buf; *p; p++) { /* Delete suffix if it exists */ + if (*p == '.') { + *p = '\0'; break; + } + } + return p - *buf; +} + +#define init_funcname(buf, file) do {\ + int len = init_funcname_len(buf, file);\ + char *tmp = ALLOCA_N(char, len+1);\ + if (!tmp) {\ + free(*buf);\ + rb_memerror();\ + }\ + strcpy(tmp, *buf);\ + free(*buf);\ + *buf = tmp;\ +} while (0) + +#ifdef USE_DLN_A_OUT + +#ifndef LIBC_NAME +# define LIBC_NAME "libc.a" +#endif + +#ifndef DLN_DEFAULT_LIB_PATH +# define DLN_DEFAULT_LIB_PATH "/lib:/usr/lib:/usr/local/lib:." +#endif + +#include <errno.h> + +static int dln_errno; + +#define DLN_ENOEXEC ENOEXEC /* Exec format error */ +#define DLN_ECONFL 1201 /* Symbol name conflict */ +#define DLN_ENOINIT 1202 /* No initializer given */ +#define DLN_EUNDEF 1203 /* Undefine symbol remains */ +#define DLN_ENOTLIB 1204 /* Not a library file */ +#define DLN_EBADLIB 1205 /* Malformed library file */ +#define DLN_EINIT 1206 /* Not initialized */ + +static int dln_init_p = 0; + +#include <ar.h> +#include <a.out.h> +#ifndef N_COMM +# define N_COMM 0x12 +#endif +#ifndef N_MAGIC +# define N_MAGIC(x) (x).a_magic +#endif + +#define INVALID_OBJECT(h) (N_MAGIC(h) != OMAGIC) + +#include "util.h" +#include "st.h" + +static st_table *sym_tbl; +static st_table *undef_tbl; + +static int load_lib(); + +static int +load_header(fd, hdrp, disp) + int fd; + struct exec *hdrp; + long disp; +{ + int size; + + lseek(fd, disp, 0); + size = read(fd, hdrp, sizeof(struct exec)); + if (size == -1) { + dln_errno = errno; + return -1; + } + if (size != sizeof(struct exec) || N_BADMAG(*hdrp)) { + dln_errno = DLN_ENOEXEC; + return -1; + } + return 0; +} + +#if defined(sequent) +#define RELOC_SYMBOL(r) ((r)->r_symbolnum) +#define RELOC_MEMORY_SUB_P(r) ((r)->r_bsr) +#define RELOC_PCREL_P(r) ((r)->r_pcrel || (r)->r_bsr) +#define RELOC_TARGET_SIZE(r) ((r)->r_length) +#endif + +/* Default macros */ +#ifndef RELOC_ADDRESS +#define RELOC_ADDRESS(r) ((r)->r_address) +#define RELOC_EXTERN_P(r) ((r)->r_extern) +#define RELOC_SYMBOL(r) ((r)->r_symbolnum) +#define RELOC_MEMORY_SUB_P(r) 0 +#define RELOC_PCREL_P(r) ((r)->r_pcrel) +#define RELOC_TARGET_SIZE(r) ((r)->r_length) +#endif + +#if defined(sun) && defined(sparc) +/* Sparc (Sun 4) macros */ +# undef relocation_info +# define relocation_info reloc_info_sparc +# define R_RIGHTSHIFT(r) (reloc_r_rightshift[(r)->r_type]) +# define R_BITSIZE(r) (reloc_r_bitsize[(r)->r_type]) +# define R_LENGTH(r) (reloc_r_length[(r)->r_type]) +static int reloc_r_rightshift[] = { + 0, 0, 0, 0, 0, 0, 2, 2, 10, 0, 0, 0, 0, 0, 0, +}; +static int reloc_r_bitsize[] = { + 8, 16, 32, 8, 16, 32, 30, 22, 22, 22, 13, 10, 32, 32, 16, +}; +static int reloc_r_length[] = { + 0, 1, 2, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, +}; +# define R_PCREL(r) \ + ((r)->r_type >= RELOC_DISP8 && (r)->r_type <= RELOC_WDISP22) +# define R_SYMBOL(r) ((r)->r_index) +#endif + +#if defined(sequent) +#define R_SYMBOL(r) ((r)->r_symbolnum) +#define R_MEMORY_SUB(r) ((r)->r_bsr) +#define R_PCREL(r) ((r)->r_pcrel || (r)->r_bsr) +#define R_LENGTH(r) ((r)->r_length) +#endif + +#ifndef R_SYMBOL +# define R_SYMBOL(r) ((r)->r_symbolnum) +# define R_MEMORY_SUB(r) 0 +# define R_PCREL(r) ((r)->r_pcrel) +# define R_LENGTH(r) ((r)->r_length) +#endif + +static struct relocation_info * +load_reloc(fd, hdrp, disp) + int fd; + struct exec *hdrp; + long disp; +{ + struct relocation_info *reloc; + int size; + + lseek(fd, disp + N_TXTOFF(*hdrp) + hdrp->a_text + hdrp->a_data, 0); + size = hdrp->a_trsize + hdrp->a_drsize; + reloc = (struct relocation_info*)xmalloc(size); + if (reloc == NULL) { + dln_errno = errno; + return NULL; + } + + if (read(fd, reloc, size) != size) { + dln_errno = errno; + free(reloc); + return NULL; + } + + return reloc; +} + +static struct nlist * +load_sym(fd, hdrp, disp) + int fd; + struct exec *hdrp; + long disp; +{ + struct nlist * buffer; + struct nlist * sym; + struct nlist * end; + long displ; + int size; + + lseek(fd, N_SYMOFF(*hdrp) + hdrp->a_syms + disp, 0); + if (read(fd, &size, sizeof(int)) != sizeof(int)) { + goto err_noexec; + } + + buffer = (struct nlist*)xmalloc(hdrp->a_syms + size); + if (buffer == NULL) { + dln_errno = errno; + return NULL; + } + + lseek(fd, disp + N_SYMOFF(*hdrp), 0); + if (read(fd, buffer, hdrp->a_syms + size) != hdrp->a_syms + size) { + free(buffer); + goto err_noexec; + } + + sym = buffer; + end = sym + hdrp->a_syms / sizeof(struct nlist); + displ = (long)buffer + (long)(hdrp->a_syms); + + while (sym < end) { + sym->n_un.n_name = (char*)sym->n_un.n_strx + displ; + sym++; + } + return buffer; + + err_noexec: + dln_errno = DLN_ENOEXEC; + return NULL; +} + +static st_table * +sym_hash(hdrp, syms) + struct exec *hdrp; + struct nlist *syms; +{ + st_table *tbl; + struct nlist *sym = syms; + struct nlist *end = syms + (hdrp->a_syms / sizeof(struct nlist)); + + tbl = st_init_strtable(); + if (tbl == NULL) { + dln_errno = errno; + return NULL; + } + + while (sym < end) { + st_insert(tbl, sym->n_un.n_name, sym); + sym++; + } + return tbl; +} + +static int +dln_init(prog) + const char *prog; +{ + char *file; + int fd; + struct exec hdr; + struct nlist *syms; + + if (dln_init_p == 1) return 0; + + file = dln_find_exe(prog, NULL); + if (file == NULL || (fd = open(file, O_RDONLY)) < 0) { + dln_errno = errno; + return -1; + } + + if (load_header(fd, &hdr, 0) == -1) return -1; + syms = load_sym(fd, &hdr, 0); + if (syms == NULL) { + close(fd); + return -1; + } + sym_tbl = sym_hash(&hdr, syms); + if (sym_tbl == NULL) { /* file may be start with #! */ + char c = '\0'; + char buf[MAXPATHLEN]; + char *p; + + free(syms); + lseek(fd, 0L, 0); + if (read(fd, &c, 1) == -1) { + dln_errno = errno; + return -1; + } + if (c != '#') goto err_noexec; + if (read(fd, &c, 1) == -1) { + dln_errno = errno; + return -1; + } + if (c != '!') goto err_noexec; + + p = buf; + /* skip forwarding spaces */ + while (read(fd, &c, 1) == 1) { + if (c == '\n') goto err_noexec; + if (c != '\t' && c != ' ') { + *p++ = c; + break; + } + } + /* read in command name */ + while (read(fd, p, 1) == 1) { + if (*p == '\n' || *p == '\t' || *p == ' ') break; + p++; + if (p-buf >= MAXPATHLEN) { + dln_errno = ENAMETOOLONG; + return -1; + } + } + *p = '\0'; + + return dln_init(buf); + } + dln_init_p = 1; + undef_tbl = st_init_strtable(); + close(fd); + return 0; + + err_noexec: + close(fd); + dln_errno = DLN_ENOEXEC; + return -1; +} + +static long +load_text_data(fd, hdrp, bss, disp) + int fd; + struct exec *hdrp; + int bss; + long disp; +{ + int size; + unsigned char* addr; + + lseek(fd, disp + N_TXTOFF(*hdrp), 0); + size = hdrp->a_text + hdrp->a_data; + + if (bss == -1) size += hdrp->a_bss; + else if (bss > 1) size += bss; + + addr = (unsigned char*)xmalloc(size); + if (addr == NULL) { + dln_errno = errno; + return 0; + } + + if (read(fd, addr, size) != size) { + dln_errno = errno; + free(addr); + return 0; + } + + if (bss == -1) { + memset(addr + hdrp->a_text + hdrp->a_data, 0, hdrp->a_bss); + } + else if (bss > 0) { + memset(addr + hdrp->a_text + hdrp->a_data, 0, bss); + } + + return (long)addr; +} + +static int +undef_print(key, value) + char *key, *value; +{ + fprintf(stderr, " %s\n", key); + return ST_CONTINUE; +} + +static void +dln_print_undef() +{ + fprintf(stderr, " Undefined symbols:\n"); + st_foreach(undef_tbl, undef_print, NULL); +} + +static void +dln_undefined() +{ + if (undef_tbl->num_entries > 0) { + fprintf(stderr, "dln: Calling undefined function\n"); + dln_print_undef(); + rb_exit(1); + } +} + +struct undef { + char *name; + struct relocation_info reloc; + long base; + char *addr; + union { + char c; + short s; + long l; + } u; +}; + +static st_table *reloc_tbl = NULL; +static void +link_undef(name, base, reloc) + const char *name; + long base; + struct relocation_info *reloc; +{ + static int u_no = 0; + struct undef *obj; + char *addr = (char*)(reloc->r_address + base); + + obj = (struct undef*)xmalloc(sizeof(struct undef)); + obj->name = strdup(name); + obj->reloc = *reloc; + obj->base = base; + switch (R_LENGTH(reloc)) { + case 0: /* byte */ + obj->u.c = *addr; + break; + case 1: /* word */ + obj->u.s = *(short*)addr; + break; + case 2: /* long */ + obj->u.l = *(long*)addr; + break; + } + if (reloc_tbl == NULL) { + reloc_tbl = st_init_numtable(); + } + st_insert(reloc_tbl, u_no++, obj); +} + +struct reloc_arg { + const char *name; + long value; +}; + +static int +reloc_undef(no, undef, arg) + int no; + struct undef *undef; + struct reloc_arg *arg; +{ + int datum; + char *address; +#if defined(sun) && defined(sparc) + unsigned int mask = 0; +#endif + + if (strcmp(arg->name, undef->name) != 0) return ST_CONTINUE; + address = (char*)(undef->base + undef->reloc.r_address); + datum = arg->value; + + if (R_PCREL(&(undef->reloc))) datum -= undef->base; +#if defined(sun) && defined(sparc) + datum += undef->reloc.r_addend; + datum >>= R_RIGHTSHIFT(&(undef->reloc)); + mask = (1 << R_BITSIZE(&(undef->reloc))) - 1; + mask |= mask -1; + datum &= mask; + switch (R_LENGTH(&(undef->reloc))) { + case 0: + *address = undef->u.c; + *address &= ~mask; + *address |= datum; + break; + case 1: + *(short *)address = undef->u.s; + *(short *)address &= ~mask; + *(short *)address |= datum; + break; + case 2: + *(long *)address = undef->u.l; + *(long *)address &= ~mask; + *(long *)address |= datum; + break; + } +#else + switch (R_LENGTH(&(undef->reloc))) { + case 0: /* byte */ + if (R_MEMORY_SUB(&(undef->reloc))) + *address = datum - *address; + else *address = undef->u.c + datum; + break; + case 1: /* word */ + if (R_MEMORY_SUB(&(undef->reloc))) + *(short*)address = datum - *(short*)address; + else *(short*)address = undef->u.s + datum; + break; + case 2: /* long */ + if (R_MEMORY_SUB(&(undef->reloc))) + *(long*)address = datum - *(long*)address; + else *(long*)address = undef->u.l + datum; + break; + } +#endif + free(undef->name); + free(undef); + return ST_DELETE; +} + +static void +unlink_undef(name, value) + const char *name; + long value; +{ + struct reloc_arg arg; + + arg.name = name; + arg.value = value; + st_foreach(reloc_tbl, reloc_undef, &arg); +} + +#ifdef N_INDR +struct indr_data { + char *name0, *name1; +}; + +static int +reloc_repl(no, undef, data) + int no; + struct undef *undef; + struct indr_data *data; +{ + if (strcmp(data->name0, undef->name) == 0) { + free(undef->name); + undef->name = strdup(data->name1); + } + return ST_CONTINUE; +} +#endif + +static int +load_1(fd, disp, need_init) + int fd; + long disp; + const char *need_init; +{ + static char *libc = LIBC_NAME; + struct exec hdr; + struct relocation_info *reloc = NULL; + long block = 0; + long new_common = 0; /* Length of new common */ + struct nlist *syms = NULL; + struct nlist *sym; + struct nlist *end; + int init_p = 0; + + if (load_header(fd, &hdr, disp) == -1) return -1; + if (INVALID_OBJECT(hdr)) { + dln_errno = DLN_ENOEXEC; + return -1; + } + reloc = load_reloc(fd, &hdr, disp); + if (reloc == NULL) return -1; + + syms = load_sym(fd, &hdr, disp); + if (syms == NULL) { + free(reloc); + return -1; + } + + sym = syms; + end = syms + (hdr.a_syms / sizeof(struct nlist)); + while (sym < end) { + struct nlist *old_sym; + int value = sym->n_value; + +#ifdef N_INDR + if (sym->n_type == (N_INDR | N_EXT)) { + char *key = sym->n_un.n_name; + + if (st_lookup(sym_tbl, sym[1].n_un.n_name, &old_sym)) { + if (st_delete(undef_tbl, (st_data_t*)&key, NULL)) { + unlink_undef(key, old_sym->n_value); + free(key); + } + } + else { + struct indr_data data; + + data.name0 = sym->n_un.n_name; + data.name1 = sym[1].n_un.n_name; + st_foreach(reloc_tbl, reloc_repl, &data); + + st_insert(undef_tbl, strdup(sym[1].n_un.n_name), NULL); + if (st_delete(undef_tbl, (st_data_t*)&key, NULL)) { + free(key); + } + } + sym += 2; + continue; + } +#endif + if (sym->n_type == (N_UNDF | N_EXT)) { + if (st_lookup(sym_tbl, sym->n_un.n_name, &old_sym) == 0) { + old_sym = NULL; + } + + if (value) { + if (old_sym) { + sym->n_type = N_EXT | N_COMM; + sym->n_value = old_sym->n_value; + } + else { + int rnd = + value >= sizeof(double) ? sizeof(double) - 1 + : value >= sizeof(long) ? sizeof(long) - 1 + : sizeof(short) - 1; + + sym->n_type = N_COMM; + new_common += rnd; + new_common &= ~(long)rnd; + sym->n_value = new_common; + new_common += value; + } + } + else { + if (old_sym) { + sym->n_type = N_EXT | N_COMM; + sym->n_value = old_sym->n_value; + } + else { + sym->n_value = (long)dln_undefined; + st_insert(undef_tbl, strdup(sym->n_un.n_name), NULL); + } + } + } + sym++; + } + + block = load_text_data(fd, &hdr, hdr.a_bss + new_common, disp); + if (block == 0) goto err_exit; + + sym = syms; + while (sym < end) { + struct nlist *new_sym; + char *key; + + switch (sym->n_type) { + case N_COMM: + sym->n_value += hdr.a_text + hdr.a_data; + case N_TEXT|N_EXT: + case N_DATA|N_EXT: + + sym->n_value += block; + + if (st_lookup(sym_tbl, sym->n_un.n_name, &new_sym) != 0 + && new_sym->n_value != (long)dln_undefined) { + dln_errno = DLN_ECONFL; + goto err_exit; + } + + key = sym->n_un.n_name; + if (st_delete(undef_tbl, (st_data_t*)&key, NULL) != 0) { + unlink_undef(key, sym->n_value); + free(key); + } + + new_sym = (struct nlist*)xmalloc(sizeof(struct nlist)); + *new_sym = *sym; + new_sym->n_un.n_name = strdup(sym->n_un.n_name); + st_insert(sym_tbl, new_sym->n_un.n_name, new_sym); + break; + + case N_TEXT: + case N_DATA: + sym->n_value += block; + break; + } + sym++; + } + + /* + * First comes the text-relocation + */ + { + struct relocation_info * rel = reloc; + struct relocation_info * rel_beg = reloc + + (hdr.a_trsize/sizeof(struct relocation_info)); + struct relocation_info * rel_end = reloc + + (hdr.a_trsize+hdr.a_drsize)/sizeof(struct relocation_info); + + while (rel < rel_end) { + char *address = (char*)(rel->r_address + block); + long datum = 0; +#if defined(sun) && defined(sparc) + unsigned int mask = 0; +#endif + + if(rel >= rel_beg) + address += hdr.a_text; + + if (rel->r_extern) { /* Look it up in symbol-table */ + sym = &(syms[R_SYMBOL(rel)]); + switch (sym->n_type) { + case N_EXT|N_UNDF: + link_undef(sym->n_un.n_name, block, rel); + case N_EXT|N_COMM: + case N_COMM: + datum = sym->n_value; + break; + default: + goto err_exit; + } + } /* end.. look it up */ + else { /* is static */ + switch (R_SYMBOL(rel)) { + case N_TEXT: + case N_DATA: + datum = block; + break; + case N_BSS: + datum = block + new_common; + break; + case N_ABS: + break; + } + } /* end .. is static */ + if (R_PCREL(rel)) datum -= block; + +#if defined(sun) && defined(sparc) + datum += rel->r_addend; + datum >>= R_RIGHTSHIFT(rel); + mask = (1 << R_BITSIZE(rel)) - 1; + mask |= mask -1; + datum &= mask; + + switch (R_LENGTH(rel)) { + case 0: + *address &= ~mask; + *address |= datum; + break; + case 1: + *(short *)address &= ~mask; + *(short *)address |= datum; + break; + case 2: + *(long *)address &= ~mask; + *(long *)address |= datum; + break; + } +#else + switch (R_LENGTH(rel)) { + case 0: /* byte */ + if (datum < -128 || datum > 127) goto err_exit; + *address += datum; + break; + case 1: /* word */ + *(short *)address += datum; + break; + case 2: /* long */ + *(long *)address += datum; + break; + } +#endif + rel++; + } + } + + if (need_init) { + int len; + char **libs_to_be_linked = 0; + char *buf; + + if (undef_tbl->num_entries > 0) { + if (load_lib(libc) == -1) goto err_exit; + } + + init_funcname(&buf, need_init); + len = strlen(buf); + + for (sym = syms; sym<end; sym++) { + char *name = sym->n_un.n_name; + if (name[0] == '_' && sym->n_value >= block) { + if (strcmp(name+1, "dln_libs_to_be_linked") == 0) { + libs_to_be_linked = (char**)sym->n_value; + } + else if (strcmp(name+1, buf) == 0) { + init_p = 1; + ((int (*)())sym->n_value)(); + } + } + } + if (libs_to_be_linked && undef_tbl->num_entries > 0) { + while (*libs_to_be_linked) { + load_lib(*libs_to_be_linked); + libs_to_be_linked++; + } + } + } + free(reloc); + free(syms); + if (need_init) { + if (init_p == 0) { + dln_errno = DLN_ENOINIT; + return -1; + } + if (undef_tbl->num_entries > 0) { + if (load_lib(libc) == -1) goto err_exit; + if (undef_tbl->num_entries > 0) { + dln_errno = DLN_EUNDEF; + return -1; + } + } + } + return 0; + + err_exit: + if (syms) free(syms); + if (reloc) free(reloc); + if (block) free((char*)block); + return -1; +} + +static int target_offset; +static int +search_undef(key, value, lib_tbl) + const char *key; + int value; + st_table *lib_tbl; +{ + long offset; + + if (st_lookup(lib_tbl, key, &offset) == 0) return ST_CONTINUE; + target_offset = offset; + return ST_STOP; +} + +struct symdef { + int rb_str_index; + int lib_offset; +}; + +char *dln_librrb_ary_path = DLN_DEFAULT_LIB_PATH; + +static int +load_lib(lib) + const char *lib; +{ + char *path, *file; + char armagic[SARMAG]; + int fd, size; + struct ar_hdr ahdr; + st_table *lib_tbl = NULL; + int *data, nsym; + struct symdef *base; + char *name_base; + + if (dln_init_p == 0) { + dln_errno = DLN_ENOINIT; + return -1; + } + + if (undef_tbl->num_entries == 0) return 0; + dln_errno = DLN_EBADLIB; + + if (lib[0] == '-' && lib[1] == 'l') { + char *p = alloca(strlen(lib) + 4); + sprintf(p, "lib%s.a", lib+2); + lib = p; + } + + /* library search path: */ + /* look for environment variable DLN_LIBRARY_PATH first. */ + /* then variable dln_librrb_ary_path. */ + /* if path is still NULL, use "." for path. */ + path = getenv("DLN_LIBRARY_PATH"); + if (path == NULL) path = dln_librrb_ary_path; + + file = dln_find_file(lib, path); + fd = open(file, O_RDONLY); + if (fd == -1) goto syserr; + size = read(fd, armagic, SARMAG); + if (size == -1) goto syserr; + + if (size != SARMAG) { + dln_errno = DLN_ENOTLIB; + goto badlib; + } + size = read(fd, &ahdr, sizeof(ahdr)); + if (size == -1) goto syserr; + if (size != sizeof(ahdr) || sscanf(ahdr.ar_size, "%d", &size) != 1) { + goto badlib; + } + + if (strncmp(ahdr.ar_name, "__.SYMDEF", 9) == 0) { + /* make hash table from __.SYMDEF */ + + lib_tbl = st_init_strtable(); + data = (int*)xmalloc(size); + if (data == NULL) goto syserr; + size = read(fd, data, size); + nsym = *data / sizeof(struct symdef); + base = (struct symdef*)(data + 1); + name_base = (char*)(base + nsym) + sizeof(int); + while (nsym > 0) { + char *name = name_base + base->rb_str_index; + + st_insert(lib_tbl, name, base->lib_offset + sizeof(ahdr)); + nsym--; + base++; + } + for (;;) { + target_offset = -1; + st_foreach(undef_tbl, search_undef, lib_tbl); + if (target_offset == -1) break; + if (load_1(fd, target_offset, 0) == -1) { + st_free_table(lib_tbl); + free(data); + goto badlib; + } + if (undef_tbl->num_entries == 0) break; + } + free(data); + st_free_table(lib_tbl); + } + else { + /* linear library, need to scan (FUTURE) */ + + for (;;) { + int offset = SARMAG; + int found = 0; + struct exec hdr; + struct nlist *syms, *sym, *end; + + while (undef_tbl->num_entries > 0) { + found = 0; + lseek(fd, offset, 0); + size = read(fd, &ahdr, sizeof(ahdr)); + if (size == -1) goto syserr; + if (size == 0) break; + if (size != sizeof(ahdr) + || sscanf(ahdr.ar_size, "%d", &size) != 1) { + goto badlib; + } + offset += sizeof(ahdr); + if (load_header(fd, &hdr, offset) == -1) + goto badlib; + syms = load_sym(fd, &hdr, offset); + if (syms == NULL) goto badlib; + sym = syms; + end = syms + (hdr.a_syms / sizeof(struct nlist)); + while (sym < end) { + if (sym->n_type == N_EXT|N_TEXT + && st_lookup(undef_tbl, sym->n_un.n_name, NULL)) { + break; + } + sym++; + } + if (sym < end) { + found++; + free(syms); + if (load_1(fd, offset, 0) == -1) { + goto badlib; + } + } + offset += size; + if (offset & 1) offset++; + } + if (found) break; + } + } + close(fd); + return 0; + + syserr: + dln_errno = errno; + badlib: + if (fd >= 0) close(fd); + return -1; +} + +static int +load(file) + const char *file; +{ + int fd; + int result; + + if (dln_init_p == 0) { + if (dln_init(dln_argv0) == -1) return -1; + } + result = strlen(file); + if (file[result-1] == 'a') { + return load_lib(file); + } + + fd = open(file, O_RDONLY); + if (fd == -1) { + dln_errno = errno; + return -1; + } + result = load_1(fd, 0, file); + close(fd); + + return result; +} + +void* +dln_sym(name) + const char *name; +{ + struct nlist *sym; + + if (st_lookup(sym_tbl, name, &sym)) + return (void*)sym->n_value; + return NULL; +} + +#endif /* USE_DLN_A_OUT */ + +#ifdef USE_DLN_DLOPEN +# include <dlfcn.h> +#endif + +#ifdef __hpux +#include <errno.h> +#include "dl.h" +#endif + +#if defined(_AIX) +#include <ctype.h> /* for isdigit() */ +#include <errno.h> /* for global errno */ +#include <sys/ldr.h> +#endif + +#ifdef NeXT +#if NS_TARGET_MAJOR < 4 +#include <mach-o/rld.h> +#else +#include <mach-o/dyld.h> +#ifndef NSLINKMODULE_OPTION_BINDNOW +#define NSLINKMODULE_OPTION_BINDNOW 1 +#endif +#endif +#else +#ifdef __APPLE__ +#include <mach-o/dyld.h> +#endif +#endif + +#if defined _WIN32 && !defined __CYGWIN__ +#include <windows.h> +#endif + +#ifdef _WIN32_WCE +#undef FormatMessage +#define FormatMessage FormatMessageA +#undef LoadLibrary +#define LoadLibrary LoadLibraryA +#undef GetProcAddress +#define GetProcAddress GetProcAddressA +#endif + +static const char * +dln_strerror() +{ +#ifdef USE_DLN_A_OUT + char *strerror(); + + switch (dln_errno) { + case DLN_ECONFL: + return "Symbol name conflict"; + case DLN_ENOINIT: + return "No initializer given"; + case DLN_EUNDEF: + return "Unresolved symbols"; + case DLN_ENOTLIB: + return "Not a library file"; + case DLN_EBADLIB: + return "Malformed library file"; + case DLN_EINIT: + return "Not initialized"; + default: + return strerror(dln_errno); + } +#endif + +#ifdef USE_DLN_DLOPEN + return (char*)dlerror(); +#endif + +#if defined _WIN32 && !defined __CYGWIN__ + static char message[1024]; + int error = GetLastError(); + char *p = message; + p += sprintf(message, "%d: ", error); + FormatMessage( + FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, + error, + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + p, + sizeof message - strlen(message), + NULL); + + for (p = message; *p; p++) { + if (*p == '\n' || *p == '\r') + *p = ' '; + } + return message; +#endif +} + + +#if defined(_AIX) && ! defined(_IA64) +static void +aix_loaderror(const char *pathname) +{ + char *message[8], errbuf[1024]; + int i,j; + + struct errtab { + int errnum; + char *errstr; + } load_errtab[] = { + {L_ERROR_TOOMANY, "too many errors, rest skipped."}, + {L_ERROR_NOLIB, "can't load library:"}, + {L_ERROR_UNDEF, "can't find symbol in library:"}, + {L_ERROR_RLDBAD, + "RLD index out of range or bad relocation type:"}, + {L_ERROR_FORMAT, "not a valid, executable xcoff file:"}, + {L_ERROR_MEMBER, + "file not an archive or does not contain requested member:"}, + {L_ERROR_TYPE, "symbol table mismatch:"}, + {L_ERROR_ALIGN, "text alignment in file is wrong."}, + {L_ERROR_SYSTEM, "System error:"}, + {L_ERROR_ERRNO, NULL} + }; + +#define LOAD_ERRTAB_LEN (sizeof(load_errtab)/sizeof(load_errtab[0])) +#define ERRBUF_APPEND(s) strncat(errbuf, s, sizeof(errbuf)-strlen(errbuf)-1) + + snprintf(errbuf, 1024, "load failed - %s ", pathname); + + if (!loadquery(1, &message[0], sizeof(message))) + ERRBUF_APPEND(strerror(errno)); + for(i = 0; message[i] && *message[i]; i++) { + int nerr = atoi(message[i]); + for (j=0; j<LOAD_ERRTAB_LEN; j++) { + if (nerr == load_errtab[i].errnum && load_errtab[i].errstr) + ERRBUF_APPEND(load_errtab[i].errstr); + } + while (isdigit(*message[i])) message[i]++; + ERRBUF_APPEND(message[i]); + ERRBUF_APPEND("\n"); + } + errbuf[strlen(errbuf)-1] = '\0'; /* trim off last newline */ + rb_loaderror(errbuf); + return; +} +#endif + +#endif /* NO_DLN_LOAD */ + +void* +dln_load(file) + const char *file; +{ +#ifdef NO_DLN_LOAD + rb_raise(rb_eLoadError, "this executable file can't load extension libraries"); +#else + +#if !defined(_AIX) && !defined(NeXT) + const char *error = 0; +#define DLN_ERROR() (error = dln_strerror(), strcpy(ALLOCA_N(char, strlen(error) + 1), error)) +#endif + +#if defined _WIN32 && !defined __CYGWIN__ + HINSTANCE handle; + char winfile[MAXPATHLEN]; + void (*init_fct)(); + char *buf; + + if (strlen(file) >= MAXPATHLEN) rb_loaderror("filename too long"); + + /* Load the file as an object one */ + init_funcname(&buf, file); + + strcpy(winfile, file); + + /* Load file */ + if ((handle = LoadLibrary(winfile)) == NULL) { + error = dln_strerror(); + goto failed; + } + + if ((init_fct = (void(*)())GetProcAddress(handle, buf)) == NULL) { + rb_loaderror("%s - %s\n%s", dln_strerror(), buf, file); + } + + /* Call the init code */ + (*init_fct)(); + return handle; +#else +#ifdef USE_DLN_A_OUT + if (load(file) == -1) { + error = dln_strerror(); + goto failed; + } + return 0; +#else + + char *buf; + /* Load the file as an object one */ + init_funcname(&buf, file); + +#ifdef USE_DLN_DLOPEN +#define DLN_DEFINED + { + void *handle; + void (*init_fct)(); + +#ifndef RTLD_LAZY +# define RTLD_LAZY 1 +#endif +#ifdef __INTERIX +# undef RTLD_GLOBAL +#endif +#ifndef RTLD_GLOBAL +# define RTLD_GLOBAL 0 +#endif + + /* Load file */ + if ((handle = (void*)dlopen(file, RTLD_LAZY|RTLD_GLOBAL)) == NULL) { + error = dln_strerror(); + goto failed; + } + + init_fct = (void(*)())dlsym(handle, buf); + if (init_fct == NULL) { + error = DLN_ERROR(); + dlclose(handle); + goto failed; + } + /* Call the init code */ + (*init_fct)(); + + return handle; + } +#endif /* USE_DLN_DLOPEN */ + +#ifdef __hpux +#define DLN_DEFINED + { + shl_t lib = NULL; + int flags; + void (*init_fct)(); + + flags = BIND_DEFERRED; + lib = shl_load(file, flags, 0); + if (lib == NULL) { + extern int errno; + rb_loaderror("%s - %s", strerror(errno), file); + } + shl_findsym(&lib, buf, TYPE_PROCEDURE, (void*)&init_fct); + if (init_fct == NULL) { + shl_findsym(&lib, buf, TYPE_UNDEFINED, (void*)&init_fct); + if (init_fct == NULL) { + errno = ENOSYM; + rb_loaderror("%s - %s", strerror(ENOSYM), file); + } + } + (*init_fct)(); + return (void*)lib; + } +#endif /* hpux */ + +#if defined(_AIX) && ! defined(_IA64) +#define DLN_DEFINED + { + void (*init_fct)(); + + init_fct = (void(*)())load((char*)file, 1, 0); + if (init_fct == NULL) { + aix_loaderror(file); + } + if (loadbind(0, (void*)dln_load, (void*)init_fct) == -1) { + aix_loaderror(file); + } + (*init_fct)(); + return (void*)init_fct; + } +#endif /* _AIX */ + +#if defined(NeXT) || defined(__APPLE__) +#define DLN_DEFINED +/*---------------------------------------------------- + By SHIROYAMA Takayuki Psi@fortune.nest.or.jp + + Special Thanks... + Yu tomoak-i@is.aist-nara.ac.jp, + Mi hisho@tasihara.nest.or.jp, + sunshine@sunshineco.com, + and... Miss ARAI Akino(^^;) + ----------------------------------------------------*/ +#if defined(NeXT) && (NS_TARGET_MAJOR < 4)/* NeXTSTEP rld functions */ + + { + NXStream* s; + unsigned long init_address; + char *object_files[2] = {NULL, NULL}; + + void (*init_fct)(); + + object_files[0] = (char*)file; + + s = NXOpenFile(2,NX_WRITEONLY); + + /* Load object file, if return value ==0 , load failed*/ + if(rld_load(s, NULL, object_files, NULL) == 0) { + NXFlush(s); + NXClose(s); + rb_loaderror("Failed to load %.200s", file); + } + + /* lookup the initial function */ + if(rld_lookup(s, buf, &init_address) == 0) { + NXFlush(s); + NXClose(s); + rb_loaderror("Failed to lookup Init function %.200s", file); + } + + NXFlush(s); + NXClose(s); + + /* Cannot call *init_address directory, so copy this value to + funtion pointer */ + init_fct = (void(*)())init_address; + (*init_fct)(); + return (void*)init_address; + } +#else/* OPENSTEP dyld functions */ + { + int dyld_result; + NSObjectFileImage obj_file; /* handle, but not use it */ + /* "file" is module file name . + "buf" is pointer to initial function name with "_" . */ + + void (*init_fct)(); + + + dyld_result = NSCreateObjectFileImageFromFile(file, &obj_file); + + if (dyld_result != NSObjectFileImageSuccess) { + rb_loaderror("Failed to load %.200s", file); + } + + NSLinkModule(obj_file, file, NSLINKMODULE_OPTION_BINDNOW); + + /* lookup the initial function */ + if(!NSIsSymbolNameDefined(buf)) { + rb_loaderror("Failed to lookup Init function %.200s",file); + } + init_fct = NSAddressOfSymbol(NSLookupAndBindSymbol(buf)); + (*init_fct)(); + + return (void*)init_fct; + } +#endif /* rld or dyld */ +#endif + +#ifdef __BEOS__ +# define DLN_DEFINED + { + status_t err_stat; /* BeOS error status code */ + image_id img_id; /* extention module unique id */ + void (*init_fct)(); /* initialize function for extention module */ + + /* load extention module */ + img_id = load_add_on(file); + if (img_id <= 0) { + rb_loaderror("Failed to load %.200s", file); + } + + /* find symbol for module initialize function. */ + /* The Be Book KernelKit Images section described to use + B_SYMBOL_TYPE_TEXT for symbol of function, not + B_SYMBOL_TYPE_CODE. Why ? */ + /* strcat(init_fct_symname, "__Fv"); */ /* parameter nothing. */ + /* "__Fv" dont need! The Be Book Bug ? */ + err_stat = get_image_symbol(img_id, buf, + B_SYMBOL_TYPE_TEXT, (void **)&init_fct); + + if (err_stat != B_NO_ERROR) { + char real_name[MAXPATHLEN]; + + strcpy(real_name, buf); + strcat(real_name, "__Fv"); + err_stat = get_image_symbol(img_id, real_name, + B_SYMBOL_TYPE_TEXT, (void **)&init_fct); + } + + if ((B_BAD_IMAGE_ID == err_stat) || (B_BAD_INDEX == err_stat)) { + unload_add_on(img_id); + rb_loaderror("Failed to lookup Init function %.200s", file); + } + else if (B_NO_ERROR != err_stat) { + char errmsg[] = "Internal of BeOS version. %.200s (symbol_name = %s)"; + unload_add_on(img_id); + rb_loaderror(errmsg, strerror(err_stat), buf); + } + + /* call module initialize function. */ + (*init_fct)(); + return (void*)img_id; + } +#endif /* __BEOS__*/ + +#ifdef __MACOS__ +# define DLN_DEFINED + { + OSErr err; + FSSpec libspec; + CFragConnectionID connID; + Ptr mainAddr; + char errMessage[1024]; + Boolean isfolder, didsomething; + Str63 fragname; + Ptr symAddr; + CFragSymbolClass class; + void (*init_fct)(); + char fullpath[MAXPATHLEN]; + + strcpy(fullpath, file); + + /* resolve any aliases to find the real file */ + c2pstr(fullpath); + (void)FSMakeFSSpec(0, 0, fullpath, &libspec); + err = ResolveAliasFile(&libspec, 1, &isfolder, &didsomething); + if (err) { + rb_loaderror("Unresolved Alias - %s", file); + } + + /* Load the fragment (or return the connID if it is already loaded */ + fragname[0] = 0; + err = GetDiskFragment(&libspec, 0, 0, fragname, + kLoadCFrag, &connID, &mainAddr, + errMessage); + if (err) { + p2cstr(errMessage); + rb_loaderror("%s - %s",errMessage , file); + } + + /* Locate the address of the correct init function */ + c2pstr(buf); + err = FindSymbol(connID, buf, &symAddr, &class); + if (err) { + rb_loaderror("Unresolved symbols - %s" , file); + } + init_fct = (void (*)())symAddr; + (*init_fct)(); + return (void*)init_fct; + } +#endif /* __MACOS__ */ + +#if defined(__VMS) +#define DLN_DEFINED + { + void *handle, (*init_fct)(); + char *fname, *p1, *p2; + + fname = (char *)__alloca(strlen(file)+1); + strcpy(fname,file); + if (p1 = strrchr(fname,'/')) + fname = p1 + 1; + if (p2 = strrchr(fname,'.')) + *p2 = '\0'; + + if ((handle = (void*)dlopen(fname, 0)) == NULL) { + error = dln_strerror(); + goto failed; + } + + if ((init_fct = (void (*)())dlsym(handle, buf)) == NULL) { + error = DLN_ERROR(); + dlclose(handle); + goto failed; + } + /* Call the init code */ + (*init_fct)(); + return handle; + } +#endif /* __VMS */ + +#ifndef DLN_DEFINED + rb_notimplement(); +#endif + +#endif /* USE_DLN_A_OUT */ +#endif +#if !defined(_AIX) && !defined(NeXT) + failed: + rb_loaderror("%s - %s", error, file); +#endif + +#endif /* NO_DLN_LOAD */ + return 0; /* dummy return */ +} + +static char *dln_find_1(); + +char * +dln_find_exe(fname, path) + const char *fname; + const char *path; +{ + if (!path) { + path = getenv(PATH_ENV); + } + + if (!path) { +#if defined(MSDOS) || defined(_WIN32) || defined(__human68k__) || defined(__MACOS__) + path = "/usr/local/bin;/usr/ucb;/usr/bin;/bin;."; +#else + path = "/usr/local/bin:/usr/ucb:/usr/bin:/bin:."; +#endif + } + return dln_find_1(fname, path, 1); +} + +char * +dln_find_file(fname, path) + const char *fname; + const char *path; +{ +#ifndef __MACOS__ + if (!path) path = "."; + return dln_find_1(fname, path, 0); +#else + if (!path) path = "."; + return _macruby_path_conv_posix_to_macos(dln_find_1(fname, path, 0)); +#endif +} + +#if defined(__CYGWIN32__) +const char * +conv_to_posix_path(win32, posix, len) + char *win32; + char *posix; + int len; +{ + char *first = win32; + char *p = win32; + char *dst = posix; + + for (p = win32; *p; p++) + if (*p == ';') { + *p = 0; + cygwin32_conv_to_posix_path(first, posix); + posix += strlen(posix); + *posix++ = ':'; + first = p + 1; + *p = ';'; + } + if (len < strlen(first)) + fprintf(stderr, "PATH length too long: %s\n", first); + else + cygwin32_conv_to_posix_path(first, posix); + return dst; +} +#endif + +static char fbuf[MAXPATHLEN]; + +static char * +dln_find_1(fname, path, exe_flag) + char *fname; + char *path; + int exe_flag; /* non 0 if looking for executable. */ +{ + register char *dp; + register char *ep; + register char *bp; + struct stat st; +#ifdef __MACOS__ + const char* mac_fullpath; +#endif + + if (!fname) return fname; + if (fname[0] == '/') return fname; + if (strncmp("./", fname, 2) == 0 || strncmp("../", fname, 3) == 0) + return fname; + if (exe_flag && strchr(fname, '/')) return fname; +#ifdef DOSISH + if (fname[0] == '\\') return fname; +# ifdef DOSISH_DRIVE_LETTER + if (strlen(fname) > 2 && fname[1] == ':') return fname; +# endif + if (strncmp(".\\", fname, 2) == 0 || strncmp("..\\", fname, 3) == 0) + return fname; + if (exe_flag && strchr(fname, '\\')) return fname; +#endif + + for (dp = path;; dp = ++ep) { + register int l; + int i; + int fspace; + + /* extract a component */ + ep = strchr(dp, PATH_SEP[0]); + if (ep == NULL) + ep = dp+strlen(dp); + + /* find the length of that component */ + l = ep - dp; + bp = fbuf; + fspace = sizeof fbuf - 2; + if (l > 0) { + /* + ** If the length of the component is zero length, + ** start from the current directory. If the + ** component begins with "~", start from the + ** user's $HOME environment variable. Otherwise + ** take the path literally. + */ + + if (*dp == '~' && (l == 1 || +#if defined(DOSISH) + dp[1] == '\\' || +#endif + dp[1] == '/')) { + char *home; + + home = getenv("HOME"); + if (home != NULL) { + i = strlen(home); + if ((fspace -= i) < 0) + goto toolong; + memcpy(bp, home, i); + bp += i; + } + dp++; + l--; + } + if (l > 0) { + if ((fspace -= l) < 0) + goto toolong; + memcpy(bp, dp, l); + bp += l; + } + + /* add a "/" between directory and filename */ + if (ep[-1] != '/') + *bp++ = '/'; + } + + /* now append the file name */ + i = strlen(fname); + if ((fspace -= i) < 0) { + toolong: + fprintf(stderr, "openpath: pathname too long (ignored)\n"); + *bp = '\0'; + fprintf(stderr, "\tDirectory \"%s\"\n", fbuf); + fprintf(stderr, "\tFile \"%s\"\n", fname); + goto next; + } + memcpy(bp, fname, i + 1); + +#ifndef __MACOS__ + if (stat(fbuf, &st) == 0) { + if (exe_flag == 0) return fbuf; + /* looking for executable */ + if (!S_ISDIR(st.st_mode) && eaccess(fbuf, X_OK) == 0) + return fbuf; + } +#else + if (mac_fullpath = _macruby_exist_file_in_libdir_as_posix_name(fbuf)) { + if (exe_flag == 0) return mac_fullpath; + /* looking for executable */ + if (stat(mac_fullpath, &st) == 0) { + if (!S_ISDIR(st.st_mode) && eaccess(mac_fullpath, X_OK) == 0) + return mac_fullpath; + } + } +#endif +#if defined(DOSISH) + if (exe_flag) { + static const char *extension[] = { +#if defined(MSDOS) + ".com", ".exe", ".bat", +#if defined(DJGPP) + ".btm", ".sh", ".ksh", ".pl", ".sed", +#endif +#elif defined(__EMX__) || defined(_WIN32) + ".exe", ".com", ".cmd", ".bat", +/* end of __EMX__ or _WIN32 */ +#else + ".r", ".R", ".x", ".X", ".bat", ".BAT", +/* __human68k__ */ +#endif + (char *) NULL + }; + int j; + + for (j = 0; extension[j]; j++) { + if (fspace < strlen(extension[j])) { + fprintf(stderr, "openpath: pathname too long (ignored)\n"); + fprintf(stderr, "\tDirectory \"%.*s\"\n", (int) (bp - fbuf), fbuf); + fprintf(stderr, "\tFile \"%s%s\"\n", fname, extension[j]); + continue; + } + strcpy(bp + i, extension[j]); +#ifndef __MACOS__ + if (stat(fbuf, &st) == 0) + return fbuf; +#else + if (mac_fullpath = _macruby_exist_file_in_libdir_as_posix_name(fbuf)) + return mac_fullpath; + +#endif + } + } +#endif /* MSDOS or _WIN32 or __human68k__ or __EMX__ */ + + next: + /* if not, and no other alternatives, life is bleak */ + if (*ep == '\0') { + return NULL; + } + + /* otherwise try the next component in the search path */ + } +} +#define NO_DLN_LOAD 1 +#include "dln.c" +void +Init_ext() +{ +} +/********************************************************************** + + enum.c - + + $Author: matz $ + $Date: 2004/10/30 06:56:17 $ + created at: Fri Oct 1 15:15:19 JST 1993 + + Copyright (C) 1993-2003 Yukihiro Matsumoto + +**********************************************************************/ + +#include "ruby.h" +#include "node.h" +#include "util.h" + +VALUE rb_mEnumerable; +static ID id_each, id_eqq, id_cmp; + +VALUE +rb_each(obj) + VALUE obj; +{ + return rb_funcall(obj, id_each, 0, 0); +} + +static VALUE +grep_i(i, arg) + VALUE i, *arg; +{ + if (RTEST(rb_funcall(arg[0], id_eqq, 1, i))) { + rb_ary_push(arg[1], i); + } + return Qnil; +} + +static VALUE +grep_iter_i(i, arg) + VALUE i, *arg; +{ + if (RTEST(rb_funcall(arg[0], id_eqq, 1, i))) { + rb_ary_push(arg[1], rb_yield(i)); + } + return Qnil; +} + +/* + * call-seq: + * enum.grep(pattern) => array + * enum.grep(pattern) {| obj | block } => array + * + * Returns an array of every element in <i>enum</i> for which + * <code>Pattern === element</code>. If the optional <em>block</em> is + * supplied, each matching element is passed to it, and the block's + * result is stored in the output array. + * + * (1..100).grep 38..44 #=> [38, 39, 40, 41, 42, 43, 44] + * c = IO.constants + * c.grep(/SEEK/) #=> ["SEEK_END", "SEEK_SET", "SEEK_CUR"] + * res = c.grep(/SEEK/) {|v| IO.const_get(v) } + * res #=> [2, 0, 1] + * + */ + +static VALUE +enum_grep(obj, pat) + VALUE obj, pat; +{ + VALUE ary = rb_ary_new(); + VALUE arg[2]; + + arg[0] = pat; + arg[1] = ary; + + rb_iterate(rb_each, obj, rb_block_given_p() ? grep_iter_i : grep_i, (VALUE)arg); + + return ary; +} + +static VALUE +find_i(i, memo) + VALUE i; + NODE *memo; +{ + if (RTEST(rb_yield(i))) { + memo->u2.value = Qtrue; + memo->u1.value = i; + rb_iter_break(); + } + return Qnil; +} + +/* + * call-seq: + * enum.detect(ifnone = nil) {| obj | block } => obj or nil + * enum.find(ifnone = nil) {| obj | block } => obj or nil + * + * Passes each entry in <i>enum</i> to <em>block</em>. Returns the + * first for which <em>block</em> is not <code>false</code>. If no + * object matches, calls <i>ifnone</i> and returns its result when it + * is specified, or returns <code>nil</code> + * + * (1..10).detect {|i| i % 5 == 0 and i % 7 == 0 } #=> nil + * (1..100).detect {|i| i % 5 == 0 and i % 7 == 0 } #=> 35 + * + */ + +static VALUE +enum_find(argc, argv, obj) + int argc; + VALUE* argv; + VALUE obj; +{ + NODE *memo = rb_node_newnode(NODE_MEMO, Qnil, Qfalse, 0); + VALUE if_none; + + rb_scan_args(argc, argv, "01", &if_none); + rb_iterate(rb_each, obj, find_i, (VALUE)memo); + if (memo->u2.value) { + return memo->u1.value; + } + if (!NIL_P(if_none)) { + return rb_funcall(if_none, rb_intern("call"), 0, 0); + } + return Qnil; +} + +static VALUE +find_all_i(i, ary) + VALUE i, ary; +{ + if (RTEST(rb_yield(i))) { + rb_ary_push(ary, i); + } + return Qnil; +} + +/* + * call-seq: + * enum.find_all {| obj | block } => array + * enum.select {| obj | block } => array + * + * Returns an array containing all elements of <i>enum</i> for which + * <em>block</em> is not <code>false</code> (see also + * <code>Enumerable#reject</code>). + * + * (1..10).find_all {|i| i % 3 == 0 } #=> [3, 6, 9] + * + */ + +static VALUE +enum_find_all(obj) + VALUE obj; +{ + VALUE ary = rb_ary_new(); + + rb_iterate(rb_each, obj, find_all_i, ary); + + return ary; +} + +static VALUE +reject_i(i, ary) + VALUE i, ary; +{ + if (!RTEST(rb_yield(i))) { + rb_ary_push(ary, i); + } + return Qnil; +} + +/* + * call-seq: + * enum.reject {| obj | block } => array + * + * Returns an array for all elements of <i>enum</i> for which + * <em>block</em> is false (see also <code>Enumerable#find_all</code>). + * + * (1..10).reject {|i| i % 3 == 0 } #=> [1, 2, 4, 5, 7, 8, 10] + * + */ + +static VALUE +enum_reject(obj) + VALUE obj; +{ + VALUE ary = rb_ary_new(); + + rb_iterate(rb_each, obj, reject_i, ary); + + return ary; +} + +static VALUE +collect_i(i, ary) + VALUE i, ary; +{ + rb_ary_push(ary, rb_yield(i)); + + return Qnil; +} + +static VALUE +collect_all(i, ary) + VALUE i, ary; +{ + rb_ary_push(ary, i); + + return Qnil; +} + +/* + * call-seq: + * enum.collect {| obj | block } => array + * enum.map {| obj | block } => array + * + * Returns a new array with the results of running <em>block</em> once + * for every element in <i>enum</i>. + * + * (1..4).collect {|i| i*i } #=> [1, 4, 9, 16] + * (1..4).collect { "cat" } #=> ["cat", "cat", "cat", "cat"] + * + */ + +static VALUE +enum_collect(obj) + VALUE obj; +{ + VALUE ary = rb_ary_new(); + + rb_iterate(rb_each, obj, rb_block_given_p() ? collect_i : collect_all, ary); + + return ary; +} + +/* + * call-seq: + * enum.to_a => array + * enum.entries => array + * + * Returns an array containing the items in <i>enum</i>. + * + * (1..7).to_a #=> [1, 2, 3, 4, 5, 6, 7] + * { 'a'=>1, 'b'=>2, 'c'=>3 }.to_a #=> [["a", 1], ["b", 2], ["c", 3]] + */ +static VALUE +enum_to_a(obj) + VALUE obj; +{ + VALUE ary = rb_ary_new(); + + rb_iterate(rb_each, obj, collect_all, ary); + + return ary; +} + +static VALUE +inject_i(i, memo) + VALUE i; + NODE *memo; +{ + if (memo->u2.value) { + memo->u2.value = Qfalse; + memo->u1.value = i; + } + else { + memo->u1.value = rb_yield_values(2, memo->u1.value, i); + } + return Qnil; +} + +/* + * call-seq: + * enum.inject(initial) {| memo, obj | block } => obj + * enum.inject {| memo, obj | block } => obj + * + * Combines the elements of <i>enum</i> by applying the block to an + * accumulator value (<i>memo</i>) and each element in turn. At each + * step, <i>memo</i> is set to the value returned by the block. The + * first form lets you supply an initial value for <i>memo</i>. The + * second form uses the first element of the collection as a the + * initial value (and skips that element while iterating). + * + * # Sum some numbers + * (5..10).inject {|sum, n| sum + n } #=> 45 + * # Multiply some numbers + * (5..10).inject(1) {|product, n| product * n } #=> 151200 + * + * # find the longest word + * longest = %w{ cat sheep bear }.inject do |memo,word| + * memo.length > word.length ? memo : word + * end + * longest #=> "sheep" + * + * # find the length of the longest word + * longest = %w{ cat sheep bear }.inject(0) do |memo,word| + * memo >= word.length ? memo : word.length + * end + * longest #=> 5 + * + */ + +static VALUE +enum_inject(argc, argv, obj) + int argc; + VALUE *argv, obj; +{ + NODE *memo; + VALUE n; + + if (rb_scan_args(argc, argv, "01", &n) == 1) { + memo = rb_node_newnode(NODE_MEMO, n, Qfalse, 0); + } + else { + memo = rb_node_newnode(NODE_MEMO, Qnil, Qtrue, 0); + } + rb_iterate(rb_each, obj, inject_i, (VALUE)memo); + n = memo->u1.value; + return n; +} + +static VALUE +partition_i(i, ary) + VALUE i, *ary; +{ + if (RTEST(rb_yield(i))) { + rb_ary_push(ary[0], i); + } + else { + rb_ary_push(ary[1], i); + } + return Qnil; +} + +/* + * call-seq: + * enum.partition {| obj | block } => [ true_array, false_array ] + * + * Returns two arrays, the first containing the elements of + * <i>enum</i> for which the block evaluates to true, the second + * containing the rest. + * + * (1..6).partition {|i| (i&1).zero?} #=> [[2, 4, 6], [1, 3, 5]] + * + */ + +static VALUE +enum_partition(obj) + VALUE obj; +{ + VALUE ary[2]; + + ary[0] = rb_ary_new(); + ary[1] = rb_ary_new(); + rb_iterate(rb_each, obj, partition_i, (VALUE)ary); + + return rb_assoc_new(ary[0], ary[1]); +} + +/* + * call-seq: + * enum.sort => array + * enum.sort {| a, b | block } => array + * + * Returns an array containing the items in <i>enum</i> sorted, + * either according to their own <code><=></code> method, or by using + * the results of the supplied block. The block should return -1, 0, or + * +1 depending on the comparison between <i>a</i> and <i>b</i>. As of + * Ruby 1.8, the method <code>Enumerable#sort_by</code> implements a + * built-in Schwartzian Transform, useful when key computation or + * comparison is expensive.. + * + * %w(rhea kea flea).sort #=> ["flea", "kea", "rhea"] + * (1..10).sort {|a,b| b <=> a} #=> [10, 9, 8, 7, 6, 5, 4, 3, 2, 1] + */ + +static VALUE +enum_sort(obj) + VALUE obj; +{ + return rb_ary_sort(enum_to_a(obj)); +} + +static VALUE +sort_by_i(i, ary) + VALUE i, ary; +{ + VALUE v; + NODE *memo; + + v = rb_yield(i); + if (RBASIC(ary)->klass) { + rb_raise(rb_eRuntimeError, "sort_by reentered"); + } + memo = rb_node_newnode(NODE_MEMO, v, i, 0); + rb_ary_push(ary, (VALUE)memo); + return Qnil; +} + +static int +sort_by_cmp(aa, bb) + NODE **aa, **bb; +{ + VALUE a = aa[0]->u1.value; + VALUE b = bb[0]->u1.value; + + return rb_cmpint(rb_funcall(a, id_cmp, 1, b), a, b); +} + +/* + * call-seq: + * enum.sort_by {| obj | block } => array + * + * Sorts <i>enum</i> using a set of keys generated by mapping the + * values in <i>enum</i> through the given block. + * + * %w{ apple pear fig }.sort_by {|word| word.length} + #=> ["fig", "pear", "apple"] + * + * The current implementation of <code>sort_by</code> generates an + * array of tuples containing the original collection element and the + * mapped value. This makes <code>sort_by</code> fairly expensive when + * the keysets are simple + * + * require 'benchmark' + * include Benchmark + * + * a = (1..100000).map {rand(100000)} + * + * bm(10) do |b| + * b.report("Sort") { a.sort } + * b.report("Sort by") { a.sort_by {|a| a} } + * end + * + * <em>produces:</em> + * + * user system total real + * Sort 0.180000 0.000000 0.180000 ( 0.175469) + * Sort by 1.980000 0.040000 2.020000 ( 2.013586) + * + * However, consider the case where comparing the keys is a non-trivial + * operation. The following code sorts some files on modification time + * using the basic <code>sort</code> method. + * + * files = Dir["*"] + * sorted = files.sort {|a,b| File.new(a).mtime <=> File.new(b).mtime} + * sorted #=> ["mon", "tues", "wed", "thurs"] + * + * This sort is inefficient: it generates two new <code>File</code> + * objects during every comparison. A slightly better technique is to + * use the <code>Kernel#test</code> method to generate the modification + * times directly. + * + * files = Dir["*"] + * sorted = files.sort { |a,b| + * test(?M, a) <=> test(?M, b) + * } + * sorted #=> ["mon", "tues", "wed", "thurs"] + * + * This still generates many unnecessary <code>Time</code> objects. A + * more efficient technique is to cache the sort keys (modification + * times in this case) before the sort. Perl users often call this + * approach a Schwartzian Transform, after Randal Schwartz. We + * construct a temporary array, where each element is an array + * containing our sort key along with the filename. We sort this array, + * and then extract the filename from the result. + * + * sorted = Dir["*"].collect { |f| + * [test(?M, f), f] + * }.sort.collect { |f| f[1] } + * sorted #=> ["mon", "tues", "wed", "thurs"] + * + * This is exactly what <code>sort_by</code> does internally. + * + * sorted = Dir["*"].sort_by {|f| test(?M, f)} + * sorted #=> ["mon", "tues", "wed", "thurs"] + */ + +static VALUE +enum_sort_by(obj) + VALUE obj; +{ + VALUE ary; + long i; + + if (TYPE(obj) == T_ARRAY) { + ary = rb_ary_new2(RARRAY(obj)->len); + } + else { + ary = rb_ary_new(); + } + RBASIC(ary)->klass = 0; + rb_iterate(rb_each, obj, sort_by_i, ary); + if (RARRAY(ary)->len > 1) { + qsort(RARRAY(ary)->ptr, RARRAY(ary)->len, sizeof(VALUE), sort_by_cmp, 0); + } + if (RBASIC(ary)->klass) { + rb_raise(rb_eRuntimeError, "sort_by reentered"); + } + for (i=0; i<RARRAY(ary)->len; i++) { + RARRAY(ary)->ptr[i] = RNODE(RARRAY(ary)->ptr[i])->u2.value; + } + RBASIC(ary)->klass = rb_cArray; + return ary; +} + +static VALUE +all_iter_i(i, memo) + VALUE i; + NODE *memo; +{ + if (!RTEST(rb_yield(i))) { + memo->u1.value = Qfalse; + rb_iter_break(); + } + return Qnil; +} + +static VALUE +all_i(i, memo) + VALUE i; + NODE *memo; +{ + if (!RTEST(i)) { + memo->u1.value = Qfalse; + rb_iter_break(); + } + return Qnil; +} + +/* + * call-seq: + * enum.all? [{|obj| block } ] => true or false + * + * Passes each element of the collection to the given block. The method + * returns <code>true</code> if the block never returns + * <code>false</code> or <code>nil</code>. If the block is not given, + * Ruby adds an implicit block of <code>{|obj| obj}</code> (that is + * <code>all?</code> will return <code>true</code> only if none of the + * collection members are <code>false</code> or <code>nil</code>.) + * + * %w{ ant bear cat}.all? {|word| word.length >= 3} #=> true + * %w{ ant bear cat}.all? {|word| word.length >= 4} #=> false + * [ nil, true, 99 ].all? #=> false + * + */ + +static VALUE +enum_all(obj) + VALUE obj; +{ + VALUE result; + NODE *memo = rb_node_newnode(NODE_MEMO, Qnil, 0, 0); + + memo->u1.value = Qtrue; + rb_iterate(rb_each, obj, rb_block_given_p() ? all_iter_i : all_i, (VALUE)memo); + result = memo->u1.value; + return result; +} + +static VALUE +any_iter_i(i, memo) + VALUE i; + NODE *memo; +{ + if (RTEST(rb_yield(i))) { + memo->u1.value = Qtrue; + rb_iter_break(); + } + return Qnil; +} + +static VALUE +any_i(i, memo) + VALUE i; + NODE *memo; +{ + if (RTEST(i)) { + memo->u1.value = Qtrue; + rb_iter_break(); + } + return Qnil; +} + +/* + * call-seq: + * enum.any? [{|obj| block } ] => true or false + * + * Passes each element of the collection to the given block. The method + * returns <code>true</code> if the block ever returns a value other + * that <code>false</code> or <code>nil</code>. If the block is not + * given, Ruby adds an implicit block of <code>{|obj| obj}</code> (that + * is <code>any?</code> will return <code>true</code> if at least one + * of the collection members is not <code>false</code> or + * <code>nil</code>. + * + * %w{ ant bear cat}.any? {|word| word.length >= 3} #=> true + * %w{ ant bear cat}.any? {|word| word.length >= 4} #=> true + * [ nil, true, 99 ].any? #=> true + * + */ + +static VALUE +enum_any(obj) + VALUE obj; +{ + VALUE result; + NODE *memo = rb_node_newnode(NODE_MEMO, Qnil, 0, 0); + + memo->u1.value = Qfalse; + rb_iterate(rb_each, obj, rb_block_given_p() ? any_iter_i : any_i, (VALUE)memo); + result = memo->u1.value; + return result; +} + +static VALUE +min_i(i, memo) + VALUE i; + NODE *memo; +{ + VALUE cmp; + + if (NIL_P(memo->u1.value)) { + memo->u1.value = i; + } + else { + cmp = rb_funcall(i, id_cmp, 1, memo->u1.value); + if (rb_cmpint(cmp, i, memo->u1.value) < 0) { + memo->u1.value = i; + } + } + return Qnil; +} + +static VALUE +min_ii(i, memo) + VALUE i; + NODE *memo; +{ + VALUE cmp; + + if (NIL_P(memo->u1.value)) { + memo->u1.value = i; + } + else { + cmp = rb_yield_values(2, i, memo->u1.value); + if (rb_cmpint(cmp, i, memo->u1.value) < 0) { + memo->u1.value = i; + } + } + return Qnil; +} + + +/* + * call-seq: + * enum.min => obj + * enum.min {| a,b | block } => obj + * + * Returns the object in <i>enum</i> with the minimum value. The + * first form assumes all objects implement <code>Comparable</code>; + * the second uses the block to return <em>a <=> b</em>. + * + * a = %w(albatross dog horse) + * a.min #=> "albatross" + * a.min {|a,b| a.length <=> b.length } #=> "dog" + */ + +static VALUE +enum_min(obj) + VALUE obj; +{ + VALUE result; + NODE *memo = rb_node_newnode(NODE_MEMO, Qnil, 0, 0); + + rb_iterate(rb_each, obj, rb_block_given_p() ? min_ii : min_i, (VALUE)memo); + result = memo->u1.value; + return result; +} + +static VALUE +max_i(i, memo) + VALUE i; + NODE *memo; +{ + VALUE cmp; + + if (NIL_P(memo->u1.value)) { + memo->u1.value = i; + } + else { + cmp = rb_funcall(i, id_cmp, 1, memo->u1.value); + if (rb_cmpint(cmp, i, memo->u1.value) > 0) { + memo->u1.value = i; + } + } + return Qnil; +} + +static VALUE +max_ii(i, memo) + VALUE i; + NODE *memo; +{ + VALUE cmp; + + if (NIL_P(memo->u1.value)) { + memo->u1.value = i; + } + else { + cmp = rb_yield_values(2, i, memo->u1.value); + if (rb_cmpint(cmp, i, memo->u1.value) > 0) { + memo->u1.value = i; + } + } + return Qnil; +} + +/* + * call-seq: + * enum.max => obj + * enum.max {|a,b| block } => obj + * + * Returns the object in _enum_ with the maximum value. The + * first form assumes all objects implement <code>Comparable</code>; + * the second uses the block to return <em>a <=> b</em>. + * + * a = %w(albatross dog horse) + * a.max #=> "horse" + * a.max {|a,b| a.length <=> b.length } #=> "albatross" + */ + +static VALUE +enum_max(obj) + VALUE obj; +{ + VALUE result; + NODE *memo = rb_node_newnode(NODE_MEMO, Qnil, 0, 0); + + rb_iterate(rb_each, obj, rb_block_given_p() ? max_ii : max_i, (VALUE)memo); + result = memo->u1.value; + return result; +} + +static VALUE +min_by_i(i, memo) + VALUE i; + NODE *memo; +{ + VALUE v; + + v = rb_yield(i); + if (NIL_P(memo->u1.value)) { + memo->u1.value = v; + memo->u2.value = i; + } + else if (rb_cmpint(rb_funcall(v, id_cmp, 1, memo->u1.value), v, memo->u1.value) < 0) { + memo->u1.value = v; + memo->u2.value = i; + } + return Qnil; +} + +/* + * call-seq: + * enum.min_by {| obj| block } => obj + * + * Returns the object in <i>enum</i> that gives the minimum + * value from the given block. + * + * a = %w(albatross dog horse) + * a.min_by {|x| x.length } #=> "dog" + */ + +static VALUE +enum_min_by(obj) + VALUE obj; +{ + VALUE result; + NODE *memo = rb_node_newnode(NODE_MEMO, Qnil, 0, 0); + + rb_iterate(rb_each, obj, min_by_i, (VALUE)memo); + result = memo->u2.value; + return result; +} + +static VALUE +max_by_i(i, memo) + VALUE i; + NODE *memo; +{ + VALUE v; + + v = rb_yield(i); + if (NIL_P(memo->u1.value)) { + memo->u1.value = v; + memo->u2.value = i; + } + else if (rb_cmpint(rb_funcall(v, id_cmp, 1, memo->u1.value), v, memo->u1.value) > 0) { + memo->u1.value = v; + memo->u2.value = i; + } + return Qnil; +} + +/* + * call-seq: + * enum.max_by {| obj| block } => obj + * + * Returns the object in <i>enum</i> that gives the maximum + * value from the given block. + * + * a = %w(albatross dog horse) + * a.max_by {|x| x.length } #=> "albatross" + */ + +static VALUE +enum_max_by(obj) + VALUE obj; +{ + VALUE result; + NODE *memo = rb_node_newnode(NODE_MEMO, Qnil, 0, 0); + + rb_iterate(rb_each, obj, max_by_i, (VALUE)memo); + result = memo->u2.value; + return result; +} + +static VALUE +member_i(item, memo) + VALUE item; + NODE *memo; +{ + if (rb_equal(item, memo->u1.value)) { + memo->u2.value = Qtrue; + rb_iter_break(); + } + return Qnil; +} + +/* + * call-seq: + * enum.include?(obj) => true or false + * enum.member?(obj) => true or false + * + * Returns <code>true</code> if any member of <i>enum</i> equals + * <i>obj</i>. Equality is tested using <code>==</code>. + * + * IO.constants.include? "SEEK_SET" #=> true + * IO.constants.include? "SEEK_NO_FURTHER" #=> false + * + */ + +static VALUE +enum_member(obj, val) + VALUE obj, val; +{ + VALUE result; + NODE *memo = rb_node_newnode(NODE_MEMO, val, Qfalse, 0); + + rb_iterate(rb_each, obj, member_i, (VALUE)memo); + result = memo->u2.value; + return result; +} + +static VALUE +each_with_index_i(val, memo) + VALUE val; + NODE *memo; +{ + rb_yield_values(2, val, INT2FIX(memo->u3.cnt)); + memo->u3.cnt++; + return Qnil; +} + +/* + * call-seq: + * enum.each_with_index {|obj, i| block } -> enum + * + * Calls <em>block</em> with two arguments, the item and its index, for + * each item in <i>enum</i>. + * + * hash = Hash.new + * %w(cat dog wombat).each_with_index {|item, index| + * hash[item] = index + * } + * hash #=> {"cat"=>0, "wombat"=>2, "dog"=>1} + * + */ + +static VALUE +enum_each_with_index(obj) + VALUE obj; +{ + NODE *memo = rb_node_newnode(NODE_MEMO, 0, 0, 0); + + rb_iterate(rb_each, obj, each_with_index_i, (VALUE)memo); + return obj; +} + +static VALUE +zip_i(val, memo) + VALUE val; + NODE *memo; +{ + VALUE result = memo->u1.value; + VALUE args = memo->u2.value; + int idx = memo->u3.cnt++; + VALUE tmp; + int i; + + tmp = rb_ary_new2(RARRAY(args)->len + 1); + rb_ary_store(tmp, 0, val); + for (i=0; i<RARRAY(args)->len; i++) { + rb_ary_push(tmp, rb_ary_entry(RARRAY(args)->ptr[i], idx)); + } + if (rb_block_given_p()) { + rb_yield(tmp); + } + else { + rb_ary_push(result, tmp); + } + return Qnil; +} + +/* + * call-seq: + * enum.zip(arg, ...) => array + * enum.zip(arg, ...) {|arr| block } => nil + * + * Converts any arguments to arrays, then merges elements of + * <i>enum</i> with corresponding elements from each argument. This + * generates a sequence of <code>enum#size</code> <em>n</em>-element + * arrays, where <em>n</em> is one more that the count of arguments. If + * the size of any argument is less than <code>enum#size</code>, + * <code>nil</code> values are supplied. If a block given, it is + * invoked for each output array, otherwise an array of arrays is + * returned. + * + * a = [ 4, 5, 6 ] + * b = [ 7, 8, 9 ] + * + * (1..3).zip(a, b) #=> [[1, 4, 7], [2, 5, 8], [3, 6, 9]] + * "cat\ndog".zip([1]) #=> [["cat\n", 1], ["dog", nil]] + * (1..3).zip #=> [[1], [2], [3]] + * + */ + +static VALUE +enum_zip(argc, argv, obj) + int argc; + VALUE *argv; + VALUE obj; +{ + int i; + VALUE result; + NODE *memo; + + for (i=0; i<argc; i++) { + argv[i] = rb_convert_type(argv[i], T_ARRAY, "Array", "to_a"); + } + result = rb_block_given_p() ? Qnil : rb_ary_new(); + memo = rb_node_newnode(NODE_MEMO, result, rb_ary_new4(argc, argv), 0); + rb_iterate(rb_each, obj, zip_i, (VALUE)memo); + + return result; +} + +/* + * The <code>Enumerable</code> mixin provides collection classes with + * several traversal and searching methods, and with the ability to + * sort. The class must provide a method <code>each</code>, which + * yields successive members of the collection. If + * <code>Enumerable#max</code>, <code>#min</code>, or + * <code>#sort</code> is used, the objects in the collection must also + * implement a meaningful <code><=></code> operator, as these methods + * rely on an ordering between members of the collection. + */ + +void +Init_Enumerable() +{ + rb_mEnumerable = rb_define_module("Enumerable"); + + rb_define_method(rb_mEnumerable,"to_a", enum_to_a, 0); + rb_define_method(rb_mEnumerable,"entries", enum_to_a, 0); + + rb_define_method(rb_mEnumerable,"sort", enum_sort, 0); + rb_define_method(rb_mEnumerable,"sort_by", enum_sort_by, 0); + rb_define_method(rb_mEnumerable,"grep", enum_grep, 1); + rb_define_method(rb_mEnumerable,"find", enum_find, -1); + rb_define_method(rb_mEnumerable,"detect", enum_find, -1); + rb_define_method(rb_mEnumerable,"find_all", enum_find_all, 0); + rb_define_method(rb_mEnumerable,"select", enum_find_all, 0); + rb_define_method(rb_mEnumerable,"reject", enum_reject, 0); + rb_define_method(rb_mEnumerable,"collect", enum_collect, 0); + rb_define_method(rb_mEnumerable,"map", enum_collect, 0); + rb_define_method(rb_mEnumerable,"inject", enum_inject, -1); + rb_define_method(rb_mEnumerable,"partition", enum_partition, 0); + rb_define_method(rb_mEnumerable,"all?", enum_all, 0); + rb_define_method(rb_mEnumerable,"any?", enum_any, 0); + rb_define_method(rb_mEnumerable,"min", enum_min, 0); + rb_define_method(rb_mEnumerable,"max", enum_max, 0); + rb_define_method(rb_mEnumerable,"min_by", enum_min_by, 0); + rb_define_method(rb_mEnumerable,"max_by", enum_max_by, 0); + rb_define_method(rb_mEnumerable,"member?", enum_member, 1); + rb_define_method(rb_mEnumerable,"include?", enum_member, 1); + rb_define_method(rb_mEnumerable,"each_with_index", enum_each_with_index, 0); + rb_define_method(rb_mEnumerable, "zip", enum_zip, -1); + + id_eqq = rb_intern("==="); + id_each = rb_intern("each"); + id_cmp = rb_intern("<=>"); +} + +/********************************************************************** + + error.c - + + $Author: eban $ + $Date: 2005/03/18 03:17:27 $ + created at: Mon Aug 9 16:11:34 JST 1993 + + Copyright (C) 1993-2003 Yukihiro Matsumoto + +**********************************************************************/ + +#include "ruby.h" +#include "env.h" +#include "st.h" + +#include <stdio.h> +#ifdef HAVE_STDARG_PROTOTYPES +#include <stdarg.h> +#define va_init_list(a,b) va_start(a,b) +#else +#include <varargs.h> +#define va_init_list(a,b) va_start(a) +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif +#ifndef EXIT_SUCCESS +#define EXIT_SUCCESS 0 +#endif + +extern const char ruby_version[], ruby_release_date[], ruby_platform[]; + +int ruby_nerrs; + +static int +err_position(buf, len) + char *buf; + long len; +{ + ruby_set_current_source(); + if (!ruby_sourcefile) { + return 0; + } + else if (ruby_sourceline == 0) { + return snprintf(buf, len, "%s: ", ruby_sourcefile); + } + else { + return snprintf(buf, len, "%s:%d: ", ruby_sourcefile, ruby_sourceline); + } +} + +static void +err_snprintf(buf, len, fmt, args) + char *buf; + long len; + const char *fmt; + va_list args; +{ + long n; + + n = err_position(buf, len); + if (len > n) { + vsnprintf((char*)buf+n, len-n, fmt, args); + } +} + +static void err_append _((const char*)); +static void +err_print(fmt, args) + const char *fmt; + va_list args; +{ + char buf[BUFSIZ]; + + err_snprintf(buf, BUFSIZ, fmt, args); + err_append(buf); +} + +void +#ifdef HAVE_STDARG_PROTOTYPES +rb_compile_error(const char *fmt, ...) +#else +rb_compile_error(fmt, va_alist) + const char *fmt; + va_dcl +#endif +{ + va_list args; + + va_init_list(args, fmt); + err_print(fmt, args); + va_end(args); + ruby_nerrs++; +} + +void +#ifdef HAVE_STDARG_PROTOTYPES +rb_compile_error_append(const char *fmt, ...) +#else +rb_compile_error_append(fmt, va_alist) + const char *fmt; + va_dcl +#endif +{ + va_list args; + char buf[BUFSIZ]; + + va_init_list(args, fmt); + vsnprintf(buf, BUFSIZ, fmt, args); + va_end(args); + err_append(buf); +} + +static void +warn_print(fmt, args) + const char *fmt; + va_list args; +{ + char buf[BUFSIZ]; + int len; + + err_snprintf(buf, BUFSIZ, fmt, args); + len = strlen(buf); + buf[len++] = '\n'; + rb_write_error2(buf, len); +} + +void +#ifdef HAVE_STDARG_PROTOTYPES +rb_warn(const char *fmt, ...) +#else +rb_warn(fmt, va_alist) + const char *fmt; + va_dcl +#endif +{ + char buf[BUFSIZ]; + va_list args; + + if (NIL_P(ruby_verbose)) return; + + snprintf(buf, BUFSIZ, "warning: %s", fmt); + + va_init_list(args, fmt); + warn_print(buf, args); + va_end(args); +} + +/* rb_warning() reports only in verbose mode */ +void +#ifdef HAVE_STDARG_PROTOTYPES +rb_warning(const char *fmt, ...) +#else +rb_warning(fmt, va_alist) + const char *fmt; + va_dcl +#endif +{ + char buf[BUFSIZ]; + va_list args; + + if (!RTEST(ruby_verbose)) return; + + snprintf(buf, BUFSIZ, "warning: %s", fmt); + + va_init_list(args, fmt); + warn_print(buf, args); + va_end(args); +} + +/* + * call-seq: + * warn(msg) => nil + * + * Display the given message (followed by a newline) on STDERR unless + * warnings are disabled (for example with the <code>-W0</code> flag). + */ + +static VALUE +rb_warn_m(self, mesg) + VALUE self, mesg; +{ + if (!NIL_P(ruby_verbose)) { + rb_io_write(rb_stderr, mesg); + rb_io_write(rb_stderr, rb_default_rs); + } + return Qnil; +} + +void +#ifdef HAVE_STDARG_PROTOTYPES +rb_bug(const char *fmt, ...) +#else +rb_bug(fmt, va_alist) + const char *fmt; + va_dcl +#endif +{ + char buf[BUFSIZ]; + va_list args; + FILE *out = stderr; + int len = err_position(buf, BUFSIZ); + + if (fwrite(buf, 1, len, out) == len || + fwrite(buf, 1, len, (out = stdout)) == len) { + fputs("[BUG] ", out); + va_init_list(args, fmt); + vfprintf(out, fmt, args); + va_end(args); + fprintf(out, "\nruby %s (%s) [%s]\n\n", + ruby_version, ruby_release_date, ruby_platform); + } + abort(); +} + +static struct types { + int type; + const char *name; +} builtin_types[] = { + {T_NIL, "nil"}, + {T_OBJECT, "Object"}, + {T_CLASS, "Class"}, + {T_ICLASS, "iClass"}, /* internal use: mixed-in module holder */ + {T_MODULE, "Module"}, + {T_FLOAT, "Float"}, + {T_STRING, "String"}, + {T_REGEXP, "Regexp"}, + {T_ARRAY, "Array"}, + {T_FIXNUM, "Fixnum"}, + {T_HASH, "Hash"}, + {T_STRUCT, "Struct"}, + {T_BIGNUM, "Bignum"}, + {T_FILE, "File"}, + {T_TRUE, "true"}, + {T_FALSE, "false"}, + {T_SYMBOL, "Symbol"}, /* :symbol */ + {T_DATA, "Data"}, /* internal use: wrapped C pointers */ + {T_MATCH, "MatchData"}, /* data of $~ */ + {T_VARMAP, "Varmap"}, /* internal use: dynamic variables */ + {T_SCOPE, "Scope"}, /* internal use: variable scope */ + {T_NODE, "Node"}, /* internal use: syntax tree node */ + {T_UNDEF, "undef"}, /* internal use: #undef; should not happen */ + {-1, 0} +}; + +void +rb_check_type(x, t) + VALUE x; + int t; +{ + struct types *type = builtin_types; + + if (x == Qundef) { + rb_bug("undef leaked to the Ruby space"); + } + + if (TYPE(x) != t) { + while (type->type >= 0) { + if (type->type == t) { + char *etype; + + if (NIL_P(x)) { + etype = "nil"; + } + else if (FIXNUM_P(x)) { + etype = "Fixnum"; + } + else if (SYMBOL_P(x)) { + etype = "Symbol"; + } + else if (rb_special_const_p(x)) { + etype = RSTRING(rb_obj_as_string(x))->ptr; + } + else { + etype = rb_obj_classname(x); + } + rb_raise(rb_eTypeError, "wrong argument type %s (expected %s)", + etype, type->name); + } + type++; + } + rb_bug("unknown type 0x%x (0x%x given)", t, TYPE(x)); + } +} + +/* exception classes */ +#include <errno.h> + +VALUE rb_eException; +VALUE rb_eSystemExit; +VALUE rb_eInterrupt; +VALUE rb_eSignal; +VALUE rb_eFatal; +VALUE rb_eStandardError; +VALUE rb_eRuntimeError; +VALUE rb_eTypeError; +VALUE rb_eArgError; +VALUE rb_eIndexError; +VALUE rb_eKeyError; +VALUE rb_eRangeError; +VALUE rb_eNameError; +VALUE rb_eNoMethodError; +VALUE rb_eSecurityError; +VALUE rb_eNotImpError; +VALUE rb_eNoMemError; +static VALUE rb_cNameErrorMesg; + +VALUE rb_eScriptError; +VALUE rb_eSyntaxError; +VALUE rb_eLoadError; + +VALUE rb_eSystemCallError; +VALUE rb_mErrno; +static VALUE eNOERROR; + +VALUE +rb_exc_new(etype, ptr, len) + VALUE etype; + const char *ptr; + long len; +{ + return rb_funcall(etype, rb_intern("new"), 1, rb_str_new(ptr, len)); +} + +VALUE +rb_exc_new2(etype, s) + VALUE etype; + const char *s; +{ + return rb_exc_new(etype, s, strlen(s)); +} + +VALUE +rb_exc_new3(etype, str) + VALUE etype, str; +{ + StringValue(str); + return rb_funcall(etype, rb_intern("new"), 1, str); +} + +/* + * call-seq: + * Exception.new(msg = nil) => exception + * + * Construct a new Exception object, optionally passing in + * a message. + */ + +static VALUE +exc_initialize(argc, argv, exc) + int argc; + VALUE *argv; + VALUE exc; +{ + VALUE arg; + + rb_scan_args(argc, argv, "01", &arg); + rb_iv_set(exc, "mesg", arg); + rb_iv_set(exc, "bt", Qnil); + + return exc; +} + +/* + * Document-method: exception + * + * call-seq: + * exc.exception(string) -> an_exception or exc + * + * With no argument, or if the argument is the same as the receiver, + * return the receiver. Otherwise, create a new + * exception object of the same class as the receiver, but with a + * message equal to <code>string.to_str</code>. + * + */ + +static VALUE +exc_exception(argc, argv, self) + int argc; + VALUE *argv; + VALUE self; +{ + VALUE exc; + + if (argc == 0) return self; + if (argc == 1 && self == argv[0]) return self; + exc = rb_obj_clone(self); + exc_initialize(argc, argv, exc); + + return exc; +} + +/* + * call-seq: + * exception.to_s => string + * + * Returns exception's message (or the name of the exception if + * no message is set). + */ + +static VALUE +exc_to_s(exc) + VALUE exc; +{ + VALUE mesg = rb_attr_get(exc, rb_intern("mesg")); + + if (NIL_P(mesg)) return rb_class_name(CLASS_OF(exc)); + if (OBJ_TAINTED(exc)) OBJ_TAINT(mesg); + return mesg; +} + +/* + * call-seq: + * exception.message => string + * + * Returns the result of invoking <code>exception.to_s</code>. + * Normally this returns the exception's message or name. By + * supplying a to_str method, exceptions are agreeing to + * be used where Strings are expected. + */ + +static VALUE +exc_message(exc) + VALUE exc; +{ + return rb_funcall(exc, rb_intern("to_s"), 0, 0); +} + +/* + * call-seq: + * exception.inspect => string + * + * Return this exception's class name an message + */ + +static VALUE +exc_inspect(exc) + VALUE exc; +{ + VALUE str, klass; + + klass = CLASS_OF(exc); + exc = rb_obj_as_string(exc); + if (RSTRING(exc)->len == 0) { + return rb_str_dup(rb_class_name(klass)); + } + + str = rb_str_buf_new2("#<"); + klass = rb_class_name(klass); + rb_str_buf_append(str, klass); + rb_str_buf_cat(str, ": ", 2); + rb_str_buf_append(str, exc); + rb_str_buf_cat(str, ">", 1); + + return str; +} + +/* + * call-seq: + * exception.backtrace => array + * + * Returns any backtrace associated with the exception. The backtrace + * is an array of strings, each containing either ``filename:lineNo: in + * `method''' or ``filename:lineNo.'' + * + * def a + * raise "boom" + * end + * + * def b + * a() + * end + * + * begin + * b() + * rescue => detail + * print detail.backtrace.join("\n") + * end + * + * <em>produces:</em> + * + * prog.rb:2:in `a' + * prog.rb:6:in `b' + * prog.rb:10 +*/ + +static VALUE +exc_backtrace(exc) + VALUE exc; +{ + ID bt = rb_intern("bt"); + + if (!rb_ivar_defined(exc, bt)) return Qnil; + return rb_ivar_get(exc, bt); +} + +static VALUE +check_backtrace(bt) + VALUE bt; +{ + long i; + static char *err = "backtrace must be Array of String"; + + if (!NIL_P(bt)) { + int t = TYPE(bt); + + if (t == T_STRING) return rb_ary_new3(1, bt); + if (t != T_ARRAY) { + rb_raise(rb_eTypeError, err); + } + for (i=0;i<RARRAY(bt)->len;i++) { + if (TYPE(RARRAY(bt)->ptr[i]) != T_STRING) { + rb_raise(rb_eTypeError, err); + } + } + } + return bt; +} + +/* + * call-seq: + * exc.set_backtrace(array) => array + * + * Sets the backtrace information associated with <i>exc</i>. The + * argument must be an array of <code>String</code> objects in the + * format described in <code>Exception#backtrace</code>. + * + */ + +static VALUE +exc_set_backtrace(exc, bt) + VALUE exc; + VALUE bt; +{ + return rb_iv_set(exc, "bt", check_backtrace(bt)); +} + +/* + * call-seq: + * exc == obj => true or false + * + * Equality---If <i>obj</i> is not an <code>Exception</code>, returns + * <code>false</code>. Otherwise, returns <code>true</code> if <i>exc</i> and + * <i>obj</i> share same class, messages, and backtrace. + */ + +static VALUE +exc_equal(exc, obj) + VALUE exc; + VALUE obj; +{ + ID id_mesg = rb_intern("mesg"); + + if (exc == obj) return Qtrue; + if (rb_obj_class(exc) != rb_obj_class(obj)) + return Qfalse; + if (!rb_equal(rb_attr_get(exc, id_mesg), rb_attr_get(obj, id_mesg))) + return Qfalse; + if (!rb_equal(exc_backtrace(exc), exc_backtrace(obj))) + return Qfalse; + return Qtrue; +} + +/* + * call-seq: + * SystemExit.new(status=0) => system_exit + * + * Create a new +SystemExit+ exception with the given status. + */ + +static VALUE +exit_initialize(argc, argv, exc) + int argc; + VALUE *argv; + VALUE exc; +{ + VALUE status = INT2FIX(EXIT_SUCCESS); + if (argc > 0 && FIXNUM_P(argv[0])) { + status = *argv++; + --argc; + } + exc_initialize(argc, argv, exc); + rb_iv_set(exc, "status", status); + return exc; +} + + +/* + * call-seq: + * system_exit.status => fixnum + * + * Return the status value associated with this system exit. + */ + +static VALUE +exit_status(exc) + VALUE exc; +{ + return rb_attr_get(exc, rb_intern("status")); +} + + +/* + * call-seq: + * system_exit.success? => true or false + * + * Returns +true+ if exiting successful, +false+ if not. + */ + +static VALUE +exit_success_p(exc) + VALUE exc; +{ + VALUE status = rb_attr_get(exc, rb_intern("status")); + if (NIL_P(status)) return Qtrue; + if (status == INT2FIX(EXIT_SUCCESS)) return Qtrue; + return Qfalse; +} + +void +#ifdef HAVE_STDARG_PROTOTYPES +rb_name_error(ID id, const char *fmt, ...) +#else +rb_name_error(id, fmt, va_alist) + ID id; + const char *fmt; + va_dcl +#endif +{ + VALUE exc, argv[2]; + va_list args; + char buf[BUFSIZ]; + + va_init_list(args, fmt); + vsnprintf(buf, BUFSIZ, fmt, args); + va_end(args); + + argv[0] = rb_str_new2(buf); + argv[1] = ID2SYM(id); + exc = rb_class_new_instance(2, argv, rb_eNameError); + rb_exc_raise(exc); +} + +/* + * call-seq: + * NameError.new(msg [, name]) => name_error + * + * Construct a new NameError exception. If given the <i>name</i> + * parameter may subsequently be examined using the <code>NameError.name</code> + * method. + */ + +static VALUE +name_err_initialize(argc, argv, self) + int argc; + VALUE *argv; + VALUE self; +{ + VALUE name; + + name = (argc > 1) ? argv[--argc] : Qnil; + exc_initialize(argc, argv, self); + rb_iv_set(self, "name", name); + return self; +} + +/* + * call-seq: + * name_error.name => string or nil + * + * Return the name associated with this NameError exception. + */ + +static VALUE +name_err_name(self) + VALUE self; +{ + return rb_attr_get(self, rb_intern("name")); +} + +/* + * call-seq: + * name_error.to_s => string + * + * Produce a nicely-formated string representing the +NameError+. + */ + +static VALUE +name_err_to_s(exc) + VALUE exc; +{ + VALUE mesg = rb_attr_get(exc, rb_intern("mesg")); + VALUE str = mesg; + + if (NIL_P(mesg)) return rb_class_name(CLASS_OF(exc)); + StringValue(str); + if (str != mesg) { + rb_iv_set(exc, "mesg", mesg = str); + } + if (OBJ_TAINTED(exc)) OBJ_TAINT(mesg); + return mesg; +} + +/* + * call-seq: + * NoMethodError.new(msg, name [, args]) => no_method_error + * + * Construct a NoMethodError exception for a method of the given name + * called with the given arguments. The name may be accessed using + * the <code>#name</code> method on the resulting object, and the + * arguments using the <code>#args</code> method. + */ + +static VALUE +nometh_err_initialize(argc, argv, self) + int argc; + VALUE *argv; + VALUE self; +{ + VALUE args = (argc > 2) ? argv[--argc] : Qnil; + name_err_initialize(argc, argv, self); + rb_iv_set(self, "args", args); + return self; +} + +/* :nodoc: */ +static void +name_err_mesg_mark(ptr) + VALUE *ptr; +{ + rb_gc_mark_locations(ptr, ptr+3); +} + +/* :nodoc: */ +static VALUE +name_err_mesg_new(obj, mesg, recv, method) + VALUE obj, mesg, recv, method; +{ + VALUE *ptr = ALLOC_N(VALUE, 3); + + ptr[0] = mesg; + ptr[1] = recv; + ptr[2] = method; + return Data_Wrap_Struct(rb_cNameErrorMesg, name_err_mesg_mark, -1, ptr); +} + +/* :nodoc: */ +static VALUE +name_err_mesg_equal(obj1, obj2) + VALUE obj1, obj2; +{ + VALUE *ptr1, *ptr2; + int i; + + if (obj1 == obj2) return Qtrue; + if (rb_obj_class(obj2) != rb_cNameErrorMesg) + return Qfalse; + + Data_Get_Struct(obj1, VALUE, ptr1); + Data_Get_Struct(obj2, VALUE, ptr2); + for (i=0; i<3; i++) { + if (!rb_equal(ptr1[i], ptr2[i])) + return Qfalse; + } + return Qtrue; +} + +/* :nodoc: */ +static VALUE +name_err_mesg_to_str(obj) + VALUE obj; +{ + VALUE *ptr, mesg; + Data_Get_Struct(obj, VALUE, ptr); + + mesg = ptr[0]; + if (NIL_P(mesg)) return Qnil; + else { + char *desc = 0; + VALUE d = 0, args[3]; + + obj = ptr[1]; + switch (TYPE(obj)) { + case T_NIL: + desc = "nil"; + break; + case T_TRUE: + desc = "true"; + break; + case T_FALSE: + desc = "false"; + break; + default: + d = rb_protect(rb_inspect, obj, 0); + if (NIL_P(d) || RSTRING(d)->len > 65) { + d = rb_any_to_s(obj); + } + desc = RSTRING(d)->ptr; + break; + } + if (desc && desc[0] != '#') { + d = rb_str_new2(desc); + rb_str_cat2(d, ":"); + rb_str_cat2(d, rb_obj_classname(obj)); + } + args[0] = mesg; + args[1] = ptr[2]; + args[2] = d; + mesg = rb_f_sprintf(3, args); + } + if (OBJ_TAINTED(obj)) OBJ_TAINT(mesg); + return mesg; +} + +/* :nodoc: */ +static VALUE +name_err_mesg_load(klass, str) + VALUE klass, str; +{ + return str; +} + +/* + * call-seq: + * no_method_error.args => obj + * + * Return the arguments passed in as the third parameter to + * the constructor. + */ + +static VALUE +nometh_err_args(self) + VALUE self; +{ + return rb_attr_get(self, rb_intern("args")); +} + +void +rb_invalid_str(str, type) + const char *str, *type; +{ + VALUE s = rb_str_inspect(rb_str_new2(str)); + + rb_raise(rb_eArgError, "invalid value for %s: %s", type, RSTRING(s)->ptr); +} + +/* + * Document-module: Errno + * + * Ruby exception objects are subclasses of <code>Exception</code>. + * However, operating systems typically report errors using plain + * integers. Module <code>Errno</code> is created dynamically to map + * these operating system errors to Ruby classes, with each error + * number generating its own subclass of <code>SystemCallError</code>. + * As the subclass is created in module <code>Errno</code>, its name + * will start <code>Errno::</code>. + * + * The names of the <code>Errno::</code> classes depend on + * the environment in which Ruby runs. On a typical Unix or Windows + * platform, there are <code>Errno</code> classes such as + * <code>Errno::EACCES</code>, <code>Errno::EAGAIN</code>, + * <code>Errno::EINTR</code>, and so on. + * + * The integer operating system error number corresponding to a + * particular error is available as the class constant + * <code>Errno::</code><em>error</em><code>::Errno</code>. + * + * Errno::EACCES::Errno #=> 13 + * Errno::EAGAIN::Errno #=> 11 + * Errno::EINTR::Errno #=> 4 + * + * The full list of operating system errors on your particular platform + * are available as the constants of <code>Errno</code>. + * + * Errno.constants #=> E2BIG, EACCES, EADDRINUSE, EADDRNOTAVAIL, ... + */ + +static st_table *syserr_tbl; + +static VALUE +set_syserr(n, name) + int n; + const char *name; +{ + VALUE error; + + if (!st_lookup(syserr_tbl, n, &error)) { + error = rb_define_class_under(rb_mErrno, name, rb_eSystemCallError); + rb_define_const(error, "Errno", INT2NUM(n)); + st_add_direct(syserr_tbl, n, error); + } + else { + rb_define_const(rb_mErrno, name, error); + } + return error; +} + +static VALUE +get_syserr(n) + int n; +{ + VALUE error; + + if (!st_lookup(syserr_tbl, n, &error)) { + char name[8]; /* some Windows' errno have 5 digits. */ + + snprintf(name, sizeof(name), "E%03d", n); + error = set_syserr(n, name); + } + return error; +} + +/* + * call-seq: + * SystemCallError.new(msg, errno) => system_call_error_subclass + * + * If _errno_ corresponds to a known system error code, constructs + * the appropriate <code>Errno</code> class for that error, otherwise + * constructs a generic <code>SystemCallError</code> object. The + * error number is subsequently available via the <code>errno</code> + * method. + */ + +static VALUE +syserr_initialize(argc, argv, self) + int argc; + VALUE *argv; + VALUE self; +{ +#if !defined(_WIN32) && !defined(__VMS) + char *strerror(); +#endif + char *err; + VALUE mesg, error; + VALUE klass = rb_obj_class(self); + + if (klass == rb_eSystemCallError) { + rb_scan_args(argc, argv, "11", &mesg, &error); + if (argc == 1 && FIXNUM_P(mesg)) { + error = mesg; mesg = Qnil; + } + if (!NIL_P(error) && st_lookup(syserr_tbl, NUM2LONG(error), &klass)) { + /* change class */ + if (TYPE(self) != T_OBJECT) { /* insurance to avoid type crash */ + rb_raise(rb_eTypeError, "invalid instance type"); + } + RBASIC(self)->klass = klass; + } + } + else { + rb_scan_args(argc, argv, "01", &mesg); + error = rb_const_get(klass, rb_intern("Errno")); + } + if (!NIL_P(error)) err = strerror(NUM2LONG(error)); + else err = "unknown error"; + if (!NIL_P(mesg)) { + VALUE str = mesg; + StringValue(str); + mesg = rb_str_new(0, strlen(err)+RSTRING(str)->len+3); + sprintf(RSTRING(mesg)->ptr, "%s - %.*s", err, + (int)RSTRING(str)->len, RSTRING(str)->ptr); + rb_str_resize(mesg, strlen(RSTRING(mesg)->ptr)); + } + else { + mesg = rb_str_new2(err); + } + exc_initialize(1, &mesg, self); + rb_iv_set(self, "errno", error); + return self; +} + +/* + * call-seq: + * system_call_error.errno => fixnum + * + * Return this SystemCallError's error number. + */ + +static VALUE +syserr_errno(self) + VALUE self; +{ + return rb_attr_get(self, rb_intern("errno")); +} + +/* + * call-seq: + * system_call_error === other => true or false + * + * Return +true+ if the receiver is a generic +SystemCallError+, or + * if the error numbers _self_ and _other_ are the same. + */ + +static VALUE +syserr_eqq(self, exc) + VALUE self, exc; +{ + VALUE num, e; + + if (!rb_obj_is_kind_of(exc, rb_eSystemCallError)) return Qfalse; + if (self == rb_eSystemCallError) return Qtrue; + + num = rb_attr_get(exc, rb_intern("errno")); + if (NIL_P(num)) { + VALUE klass = CLASS_OF(exc); + + while (TYPE(klass) == T_ICLASS || FL_TEST(klass, FL_SINGLETON)) { + klass = (VALUE)RCLASS(klass)->super; + } + num = rb_const_get(klass, rb_intern("Errno")); + } + e = rb_const_get(self, rb_intern("Errno")); + if (FIXNUM_P(num) ? num == e : rb_equal(num, e)) + return Qtrue; + return Qfalse; +} + +/* + * call-seq: + * Errno.const_missing => SystemCallError + * + * Returns default SystemCallError class. + */ +static VALUE +errno_missing(self, id) + VALUE self, id; +{ + return eNOERROR; +} + +/* + * Descendents of class <code>Exception</code> are used to communicate + * between <code>raise</code> methods and <code>rescue</code> + * statements in <code>begin/end</code> blocks. <code>Exception</code> + * objects carry information about the exception---its type (the + * exception's class name), an optional descriptive string, and + * optional traceback information. Programs may subclass + * <code>Exception</code> to add additional information. + */ + +void +Init_Exception() +{ + rb_eException = rb_define_class("Exception", rb_cObject); + rb_define_singleton_method(rb_eException, "exception", rb_class_new_instance, -1); + rb_define_method(rb_eException, "exception", exc_exception, -1); + rb_define_method(rb_eException, "initialize", exc_initialize, -1); + rb_define_method(rb_eException, "==", exc_equal, 1); + rb_define_method(rb_eException, "to_s", exc_to_s, 0); + rb_define_method(rb_eException, "message", exc_message, 0); + rb_define_method(rb_eException, "inspect", exc_inspect, 0); + rb_define_method(rb_eException, "backtrace", exc_backtrace, 0); + rb_define_method(rb_eException, "set_backtrace", exc_set_backtrace, 1); + + rb_eSystemExit = rb_define_class("SystemExit", rb_eException); + rb_define_method(rb_eSystemExit, "initialize", exit_initialize, -1); + rb_define_method(rb_eSystemExit, "status", exit_status, 0); + rb_define_method(rb_eSystemExit, "success?", exit_success_p, 0); + + rb_eFatal = rb_define_class("fatal", rb_eException); + rb_eSignal = rb_define_class("SignalException", rb_eException); + rb_eInterrupt = rb_define_class("Interrupt", rb_eSignal); + + rb_eStandardError = rb_define_class("StandardError", rb_eException); + rb_eTypeError = rb_define_class("TypeError", rb_eStandardError); + rb_eArgError = rb_define_class("ArgumentError", rb_eStandardError); + rb_eIndexError = rb_define_class("IndexError", rb_eStandardError); + rb_eKeyError = rb_define_class("KeyError", rb_eIndexError); + rb_eRangeError = rb_define_class("RangeError", rb_eStandardError); + rb_eNameError = rb_define_class("NameError", rb_eStandardError); + rb_define_method(rb_eNameError, "initialize", name_err_initialize, -1); + rb_define_method(rb_eNameError, "name", name_err_name, 0); + rb_define_method(rb_eNameError, "to_s", name_err_to_s, 0); + rb_cNameErrorMesg = rb_define_class_under(rb_eNameError, "message", rb_cData); + rb_define_singleton_method(rb_cNameErrorMesg, "!", name_err_mesg_new, 3); + rb_define_method(rb_cNameErrorMesg, "==", name_err_mesg_equal, 1); + rb_define_method(rb_cNameErrorMesg, "to_str", name_err_mesg_to_str, 0); + rb_define_method(rb_cNameErrorMesg, "_dump", name_err_mesg_to_str, 1); + rb_define_singleton_method(rb_cNameErrorMesg, "_load", name_err_mesg_load, 1); + rb_eNoMethodError = rb_define_class("NoMethodError", rb_eNameError); + rb_define_method(rb_eNoMethodError, "initialize", nometh_err_initialize, -1); + rb_define_method(rb_eNoMethodError, "args", nometh_err_args, 0); + + rb_eScriptError = rb_define_class("ScriptError", rb_eException); + rb_eSyntaxError = rb_define_class("SyntaxError", rb_eScriptError); + rb_eLoadError = rb_define_class("LoadError", rb_eScriptError); + rb_eNotImpError = rb_define_class("NotImplementedError", rb_eScriptError); + + rb_eRuntimeError = rb_define_class("RuntimeError", rb_eStandardError); + rb_eSecurityError = rb_define_class("SecurityError", rb_eStandardError); + rb_eNoMemError = rb_define_class("NoMemoryError", rb_eException); + + syserr_tbl = st_init_numtable(); + rb_eSystemCallError = rb_define_class("SystemCallError", rb_eStandardError); + rb_define_method(rb_eSystemCallError, "initialize", syserr_initialize, -1); + rb_define_method(rb_eSystemCallError, "errno", syserr_errno, 0); + rb_define_singleton_method(rb_eSystemCallError, "===", syserr_eqq, 1); + + rb_mErrno = rb_define_module("Errno"); + rb_define_singleton_method(rb_mErrno, "const_missing", errno_missing, 1); + + rb_define_global_function("warn", rb_warn_m, 1); +} + +void +#ifdef HAVE_STDARG_PROTOTYPES +rb_raise(VALUE exc, const char *fmt, ...) +#else +rb_raise(exc, fmt, va_alist) + VALUE exc; + const char *fmt; + va_dcl +#endif +{ + va_list args; + char buf[BUFSIZ]; + + va_init_list(args,fmt); + vsnprintf(buf, BUFSIZ, fmt, args); + va_end(args); + rb_exc_raise(rb_exc_new2(exc, buf)); +} + +void +#ifdef HAVE_STDARG_PROTOTYPES +rb_loaderror(const char *fmt, ...) +#else +rb_loaderror(fmt, va_alist) + const char *fmt; + va_dcl +#endif +{ + va_list args; + char buf[BUFSIZ]; + + va_init_list(args, fmt); + vsnprintf(buf, BUFSIZ, fmt, args); + va_end(args); + rb_exc_raise(rb_exc_new2(rb_eLoadError, buf)); +} + +void +rb_notimplement() +{ + rb_raise(rb_eNotImpError, + "The %s() function is unimplemented on this machine", + rb_id2name(ruby_frame->callee)); +} + +void +#ifdef HAVE_STDARG_PROTOTYPES +rb_fatal(const char *fmt, ...) +#else +rb_fatal(fmt, va_alist) + const char *fmt; + va_dcl +#endif +{ + va_list args; + char buf[BUFSIZ]; + + va_init_list(args, fmt); + vsnprintf(buf, BUFSIZ, fmt, args); + va_end(args); + + ruby_in_eval = 0; + rb_exc_fatal(rb_exc_new2(rb_eFatal, buf)); +} + +void +rb_sys_fail(mesg) + const char *mesg; +{ + extern int errno; + int n = errno; + VALUE arg; + + errno = 0; + if (n == 0) { + rb_bug("rb_sys_fail(%s) - errno == 0", mesg ? mesg : ""); + } + + arg = mesg ? rb_str_new2(mesg) : Qnil; + rb_exc_raise(rb_class_new_instance(1, &arg, get_syserr(n))); +} + +void +#ifdef HAVE_STDARG_PROTOTYPES +rb_sys_warning(const char *fmt, ...) +#else +rb_sys_warning(fmt, va_alist) + const char *fmt; + va_dcl +#endif +{ + char buf[BUFSIZ]; + va_list args; + int errno_save; + + errno_save = errno; + + if (!RTEST(ruby_verbose)) return; + + snprintf(buf, BUFSIZ, "warning: %s", fmt); + snprintf(buf+strlen(buf), BUFSIZ-strlen(buf), ": %s", strerror(errno_save)); + + va_init_list(args, fmt); + warn_print(buf, args); + va_end(args); + errno = errno_save; +} + +void +rb_load_fail(path) + const char *path; +{ + rb_loaderror("%s -- %s", strerror(errno), path); +} + +void +rb_error_frozen(what) + const char *what; +{ + rb_raise(rb_eRuntimeError, "can't modify frozen %s", what); +} + +void +rb_check_frozen(obj) + VALUE obj; +{ + if (OBJ_FROZEN(obj)) rb_error_frozen(rb_obj_classname(obj)); +} + +void +Init_syserr() +{ +#ifdef EPERM + set_syserr(EPERM, "EPERM"); +#endif +#ifdef ENOENT + set_syserr(ENOENT, "ENOENT"); +#endif +#ifdef ESRCH + set_syserr(ESRCH, "ESRCH"); +#endif +#ifdef EINTR + set_syserr(EINTR, "EINTR"); +#endif +#ifdef EIO + set_syserr(EIO, "EIO"); +#endif +#ifdef ENXIO + set_syserr(ENXIO, "ENXIO"); +#endif +#ifdef E2BIG + set_syserr(E2BIG, "E2BIG"); +#endif +#ifdef ENOEXEC + set_syserr(ENOEXEC, "ENOEXEC"); +#endif +#ifdef EBADF + set_syserr(EBADF, "EBADF"); +#endif +#ifdef ECHILD + set_syserr(ECHILD, "ECHILD"); +#endif +#ifdef EAGAIN + set_syserr(EAGAIN, "EAGAIN"); +#endif +#ifdef ENOMEM + set_syserr(ENOMEM, "ENOMEM"); +#endif +#ifdef EACCES + set_syserr(EACCES, "EACCES"); +#endif +#ifdef EFAULT + set_syserr(EFAULT, "EFAULT"); +#endif +#ifdef ENOTBLK + set_syserr(ENOTBLK, "ENOTBLK"); +#endif +#ifdef EBUSY + set_syserr(EBUSY, "EBUSY"); +#endif +#ifdef EEXIST + set_syserr(EEXIST, "EEXIST"); +#endif +#ifdef EXDEV + set_syserr(EXDEV, "EXDEV"); +#endif +#ifdef ENODEV + set_syserr(ENODEV, "ENODEV"); +#endif +#ifdef ENOTDIR + set_syserr(ENOTDIR, "ENOTDIR"); +#endif +#ifdef EISDIR + set_syserr(EISDIR, "EISDIR"); +#endif +#ifdef EINVAL + set_syserr(EINVAL, "EINVAL"); +#endif +#ifdef ENFILE + set_syserr(ENFILE, "ENFILE"); +#endif +#ifdef EMFILE + set_syserr(EMFILE, "EMFILE"); +#endif +#ifdef ENOTTY + set_syserr(ENOTTY, "ENOTTY"); +#endif +#ifdef ETXTBSY + set_syserr(ETXTBSY, "ETXTBSY"); +#endif +#ifdef EFBIG + set_syserr(EFBIG, "EFBIG"); +#endif +#ifdef ENOSPC + set_syserr(ENOSPC, "ENOSPC"); +#endif +#ifdef ESPIPE + set_syserr(ESPIPE, "ESPIPE"); +#endif +#ifdef EROFS + set_syserr(EROFS, "EROFS"); +#endif +#ifdef EMLINK + set_syserr(EMLINK, "EMLINK"); +#endif +#ifdef EPIPE + set_syserr(EPIPE, "EPIPE"); +#endif +#ifdef EDOM + set_syserr(EDOM, "EDOM"); +#endif +#ifdef ERANGE + set_syserr(ERANGE, "ERANGE"); +#endif +#ifdef EDEADLK + set_syserr(EDEADLK, "EDEADLK"); +#endif +#ifdef ENAMETOOLONG + set_syserr(ENAMETOOLONG, "ENAMETOOLONG"); +#endif +#ifdef ENOLCK + set_syserr(ENOLCK, "ENOLCK"); +#endif +#ifdef ENOSYS + set_syserr(ENOSYS, "ENOSYS"); +#endif +#ifdef ENOTEMPTY + set_syserr(ENOTEMPTY, "ENOTEMPTY"); +#endif +#ifdef ELOOP + set_syserr(ELOOP, "ELOOP"); +#endif +#ifdef EWOULDBLOCK + set_syserr(EWOULDBLOCK, "EWOULDBLOCK"); +#endif +#ifdef ENOMSG + set_syserr(ENOMSG, "ENOMSG"); +#endif +#ifdef EIDRM + set_syserr(EIDRM, "EIDRM"); +#endif +#ifdef ECHRNG + set_syserr(ECHRNG, "ECHRNG"); +#endif +#ifdef EL2NSYNC + set_syserr(EL2NSYNC, "EL2NSYNC"); +#endif +#ifdef EL3HLT + set_syserr(EL3HLT, "EL3HLT"); +#endif +#ifdef EL3RST + set_syserr(EL3RST, "EL3RST"); +#endif +#ifdef ELNRNG + set_syserr(ELNRNG, "ELNRNG"); +#endif +#ifdef EUNATCH + set_syserr(EUNATCH, "EUNATCH"); +#endif +#ifdef ENOCSI + set_syserr(ENOCSI, "ENOCSI"); +#endif +#ifdef EL2HLT + set_syserr(EL2HLT, "EL2HLT"); +#endif +#ifdef EBADE + set_syserr(EBADE, "EBADE"); +#endif +#ifdef EBADR + set_syserr(EBADR, "EBADR"); +#endif +#ifdef EXFULL + set_syserr(EXFULL, "EXFULL"); +#endif +#ifdef ENOANO + set_syserr(ENOANO, "ENOANO"); +#endif +#ifdef EBADRQC + set_syserr(EBADRQC, "EBADRQC"); +#endif +#ifdef EBADSLT + set_syserr(EBADSLT, "EBADSLT"); +#endif +#ifdef EDEADLOCK + set_syserr(EDEADLOCK, "EDEADLOCK"); +#endif +#ifdef EBFONT + set_syserr(EBFONT, "EBFONT"); +#endif +#ifdef ENOSTR + set_syserr(ENOSTR, "ENOSTR"); +#endif +#ifdef ENODATA + set_syserr(ENODATA, "ENODATA"); +#endif +#ifdef ETIME + set_syserr(ETIME, "ETIME"); +#endif +#ifdef ENOSR + set_syserr(ENOSR, "ENOSR"); +#endif +#ifdef ENONET + set_syserr(ENONET, "ENONET"); +#endif +#ifdef ENOPKG + set_syserr(ENOPKG, "ENOPKG"); +#endif +#ifdef EREMOTE + set_syserr(EREMOTE, "EREMOTE"); +#endif +#ifdef ENOLINK + set_syserr(ENOLINK, "ENOLINK"); +#endif +#ifdef EADV + set_syserr(EADV, "EADV"); +#endif +#ifdef ESRMNT + set_syserr(ESRMNT, "ESRMNT"); +#endif +#ifdef ECOMM + set_syserr(ECOMM, "ECOMM"); +#endif +#ifdef EPROTO + set_syserr(EPROTO, "EPROTO"); +#endif +#ifdef EMULTIHOP + set_syserr(EMULTIHOP, "EMULTIHOP"); +#endif +#ifdef EDOTDOT + set_syserr(EDOTDOT, "EDOTDOT"); +#endif +#ifdef EBADMSG + set_syserr(EBADMSG, "EBADMSG"); +#endif +#ifdef EOVERFLOW + set_syserr(EOVERFLOW, "EOVERFLOW"); +#endif +#ifdef ENOTUNIQ + set_syserr(ENOTUNIQ, "ENOTUNIQ"); +#endif +#ifdef EBADFD + set_syserr(EBADFD, "EBADFD"); +#endif +#ifdef EREMCHG + set_syserr(EREMCHG, "EREMCHG"); +#endif +#ifdef ELIBACC + set_syserr(ELIBACC, "ELIBACC"); +#endif +#ifdef ELIBBAD + set_syserr(ELIBBAD, "ELIBBAD"); +#endif +#ifdef ELIBSCN + set_syserr(ELIBSCN, "ELIBSCN"); +#endif +#ifdef ELIBMAX + set_syserr(ELIBMAX, "ELIBMAX"); +#endif +#ifdef ELIBEXEC + set_syserr(ELIBEXEC, "ELIBEXEC"); +#endif +#ifdef EILSEQ + set_syserr(EILSEQ, "EILSEQ"); +#endif +#ifdef ERESTART + set_syserr(ERESTART, "ERESTART"); +#endif +#ifdef ESTRPIPE + set_syserr(ESTRPIPE, "ESTRPIPE"); +#endif +#ifdef EUSERS + set_syserr(EUSERS, "EUSERS"); +#endif +#ifdef ENOTSOCK + set_syserr(ENOTSOCK, "ENOTSOCK"); +#endif +#ifdef EDESTADDRREQ + set_syserr(EDESTADDRREQ, "EDESTADDRREQ"); +#endif +#ifdef EMSGSIZE + set_syserr(EMSGSIZE, "EMSGSIZE"); +#endif +#ifdef EPROTOTYPE + set_syserr(EPROTOTYPE, "EPROTOTYPE"); +#endif +#ifdef ENOPROTOOPT + set_syserr(ENOPROTOOPT, "ENOPROTOOPT"); +#endif +#ifdef EPROTONOSUPPORT + set_syserr(EPROTONOSUPPORT, "EPROTONOSUPPORT"); +#endif +#ifdef ESOCKTNOSUPPORT + set_syserr(ESOCKTNOSUPPORT, "ESOCKTNOSUPPORT"); +#endif +#ifdef EOPNOTSUPP + set_syserr(EOPNOTSUPP, "EOPNOTSUPP"); +#endif +#ifdef EPFNOSUPPORT + set_syserr(EPFNOSUPPORT, "EPFNOSUPPORT"); +#endif +#ifdef EAFNOSUPPORT + set_syserr(EAFNOSUPPORT, "EAFNOSUPPORT"); +#endif +#ifdef EADDRINUSE + set_syserr(EADDRINUSE, "EADDRINUSE"); +#endif +#ifdef EADDRNOTAVAIL + set_syserr(EADDRNOTAVAIL, "EADDRNOTAVAIL"); +#endif +#ifdef ENETDOWN + set_syserr(ENETDOWN, "ENETDOWN"); +#endif +#ifdef ENETUNREACH + set_syserr(ENETUNREACH, "ENETUNREACH"); +#endif +#ifdef ENETRESET + set_syserr(ENETRESET, "ENETRESET"); +#endif +#ifdef ECONNABORTED + set_syserr(ECONNABORTED, "ECONNABORTED"); +#endif +#ifdef ECONNRESET + set_syserr(ECONNRESET, "ECONNRESET"); +#endif +#ifdef ENOBUFS + set_syserr(ENOBUFS, "ENOBUFS"); +#endif +#ifdef EISCONN + set_syserr(EISCONN, "EISCONN"); +#endif +#ifdef ENOTCONN + set_syserr(ENOTCONN, "ENOTCONN"); +#endif +#ifdef ESHUTDOWN + set_syserr(ESHUTDOWN, "ESHUTDOWN"); +#endif +#ifdef ETOOMANYREFS + set_syserr(ETOOMANYREFS, "ETOOMANYREFS"); +#endif +#ifdef ETIMEDOUT + set_syserr(ETIMEDOUT, "ETIMEDOUT"); +#endif +#ifdef ECONNREFUSED + set_syserr(ECONNREFUSED, "ECONNREFUSED"); +#endif +#ifdef EHOSTDOWN + set_syserr(EHOSTDOWN, "EHOSTDOWN"); +#endif +#ifdef EHOSTUNREACH + set_syserr(EHOSTUNREACH, "EHOSTUNREACH"); +#endif +#ifdef EALREADY + set_syserr(EALREADY, "EALREADY"); +#endif +#ifdef EINPROGRESS + set_syserr(EINPROGRESS, "EINPROGRESS"); +#endif +#ifdef ESTALE + set_syserr(ESTALE, "ESTALE"); +#endif +#ifdef EUCLEAN + set_syserr(EUCLEAN, "EUCLEAN"); +#endif +#ifdef ENOTNAM + set_syserr(ENOTNAM, "ENOTNAM"); +#endif +#ifdef ENAVAIL + set_syserr(ENAVAIL, "ENAVAIL"); +#endif +#ifdef EISNAM + set_syserr(EISNAM, "EISNAM"); +#endif +#ifdef EREMOTEIO + set_syserr(EREMOTEIO, "EREMOTEIO"); +#endif +#ifdef EDQUOT + set_syserr(EDQUOT, "EDQUOT"); +#endif + eNOERROR = set_syserr(0, "NOERROR"); +} + +static void +err_append(s) + const char *s; +{ + extern VALUE ruby_errinfo; + + if (ruby_in_eval) { + if (NIL_P(ruby_errinfo)) { + ruby_errinfo = rb_exc_new2(rb_eSyntaxError, s); + } + else { + VALUE str = rb_obj_as_string(ruby_errinfo); + + rb_str_cat2(str, "\n"); + rb_str_cat2(str, s); + ruby_errinfo = rb_exc_new3(rb_eSyntaxError, str); + } + } + else { + rb_write_error(s); + rb_write_error("\n"); + } +} +/********************************************************************** + euc_jp.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define eucjp_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1) + +static int EncLen_EUCJP[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 +}; + +static int +eucjp_mbc_enc_len(const UChar* p) +{ + return EncLen_EUCJP[*p]; +} + +static OnigCodePoint +eucjp_mbc_to_code(const UChar* p, const UChar* end) +{ + int c, i, len; + OnigCodePoint n; + + len = enc_len(ONIG_ENCODING_EUC_JP, p); + n = (OnigCodePoint )*p++; + if (len == 1) return n; + + for (i = 1; i < len; i++) { + if (p >= end) break; + c = *p++; + n <<= 8; n += c; + } + return n; +} + +static int +eucjp_code_to_mbclen(OnigCodePoint code) +{ + if (ONIGENC_IS_CODE_ASCII(code)) return 1; + else if ((code & 0xff0000) != 0) return 3; + else if ((code & 0xff00) != 0) return 2; + else return 0; +} + +#if 0 +static int +eucjp_code_to_mbc_first(OnigCodePoint code) +{ + int first; + + if ((code & 0xff0000) != 0) { + first = (code >> 16) & 0xff; + } + else if ((code & 0xff00) != 0) { + first = (code >> 8) & 0xff; + } + else { + return (int )code; + } + return first; +} +#endif + +static int +eucjp_code_to_mbc(OnigCodePoint code, UChar *buf) +{ + UChar *p = buf; + + if ((code & 0xff0000) != 0) *p++ = (UChar )(((code >> 16) & 0xff)); + if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff)); + *p++ = (UChar )(code & 0xff); + +#if 1 + if (enc_len(ONIG_ENCODING_EUC_JP, buf) != (p - buf)) + return ONIGENCERR_INVALID_WIDE_CHAR_VALUE; +#endif + return p - buf; +} + +static int +eucjp_mbc_to_normalize(OnigAmbigType flag, + const UChar** pp, const UChar* end, UChar* lower) +{ + int len; + const UChar* p = *pp; + + if (ONIGENC_IS_MBC_ASCII(p)) { + if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { + *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); + } + else { + *lower = *p; + } + + (*pp)++; + return 1; + } + else { + len = enc_len(ONIG_ENCODING_EUC_JP, p); + if (lower != p) { + int i; + for (i = 0; i < len; i++) { + *lower++ = *p++; + } + } + (*pp) += len; + return len; /* return byte length of converted char to lower */ + } +} + +static int +eucjp_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) +{ + return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_EUC_JP, flag, pp, end); +} + +static int +eucjp_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if ((ctype & ONIGENC_CTYPE_WORD) != 0) { + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else + return (eucjp_code_to_mbclen(code) > 1 ? TRUE : FALSE); + + ctype &= ~ONIGENC_CTYPE_WORD; + if (ctype == 0) return FALSE; + } + + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else + return FALSE; +} + +static UChar* +eucjp_left_adjust_char_head(const UChar* start, const UChar* s) +{ + /* In this encoding + mb-trail bytes doesn't mix with single bytes. + */ + const UChar *p; + int len; + + if (s <= start) return (UChar* )s; + p = s; + + while (!eucjp_islead(*p) && p > start) p--; + len = enc_len(ONIG_ENCODING_EUC_JP, p); + if (p + len > s) return (UChar* )p; + p += len; + return (UChar* )(p + ((s - p) & ~1)); +} + +static int +eucjp_is_allowed_reverse_match(const UChar* s, const UChar* end) +{ + const UChar c = *s; + if (c <= 0x7e || c == 0x8e || c == 0x8f) + return TRUE; + else + return FALSE; +} + +OnigEncodingType OnigEncodingEUC_JP = { + eucjp_mbc_enc_len, + "EUC-JP", /* name */ + 3, /* max enc length */ + 1, /* min enc length */ + ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE, + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + }, + onigenc_is_mbc_newline_0x0a, + eucjp_mbc_to_code, + eucjp_code_to_mbclen, + eucjp_code_to_mbc, + eucjp_mbc_to_normalize, + eucjp_is_mbc_ambiguous, + onigenc_ascii_get_all_pair_ambig_codes, + onigenc_nothing_get_all_comp_ambig_codes, + eucjp_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + eucjp_left_adjust_char_head, + eucjp_is_allowed_reverse_match +}; +/********************************************************************** + + eval.c - + + $Author: nobu $ + $Date: 2005/05/01 00:15:25 $ + created at: Thu Jun 10 14:22:17 JST 1993 + + Copyright (C) 1993-2003 Yukihiro Matsumoto + Copyright (C) 2000 Network Applied Communication Laboratory, Inc. + Copyright (C) 2000 Information-technology Promotion Agency, Japan + +**********************************************************************/ + +#include "ruby.h" +#include "node.h" +#include "env.h" +#include "util.h" +#include "rubysig.h" + +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif +#ifndef EXIT_SUCCESS +#define EXIT_SUCCESS 0 +#endif +#ifndef EXIT_FAILURE +#define EXIT_FAILURE 1 +#endif + +#include <stdio.h> +#if defined(HAVE_GETCONTEXT) && defined(HAVE_SETCONTEXT) +#include <ucontext.h> +#define USE_CONTEXT +#else +#include <setjmp.h> +#endif + +#include "st.h" +#include "dln.h" + +#ifdef __APPLE__ +#include <crt_externs.h> +#endif + +/* Make alloca work the best possible way. */ +#ifdef __GNUC__ +# ifndef atarist +# ifndef alloca +# define alloca __builtin_alloca +# endif +# endif /* atarist */ +#else +# ifdef HAVE_ALLOCA_H +# include <alloca.h> +# else +# ifdef _AIX + #pragma alloca +# else +# ifndef alloca /* predefined by HP cc +Olibcalls */ +void *alloca (); +# endif +# endif /* AIX */ +# endif /* HAVE_ALLOCA_H */ +#endif /* __GNUC__ */ + +#ifdef HAVE_STDARG_PROTOTYPES +#include <stdarg.h> +#define va_init_list(a,b) va_start(a,b) +#else +#include <varargs.h> +#define va_init_list(a,b) va_start(a) +#endif + +#ifndef HAVE_STRING_H +char *strrchr _((const char*,const char)); +#endif + +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif + +#ifdef __BEOS__ +#include <net/socket.h> +#endif + +#ifdef __MACOS__ +#include "macruby_private.h" +#endif + +#ifdef USE_CONTEXT +typedef struct { + ucontext_t context; + volatile int status; +} rb_jmpbuf_t[1]; + +#undef longjmp +#undef setjmp +NORETURN(static void rb_jump_context(rb_jmpbuf_t, int)); +static inline void +rb_jump_context(env, val) + rb_jmpbuf_t env; + int val; +{ + env->status = val; + setcontext(&env->context); + abort(); /* ensure noreturn */ +} +#define longjmp(env, val) rb_jump_context(env, val) +#define setjmp(j) ((j)->status = 0, getcontext(&(j)->context), (j)->status) +#else +typedef jmp_buf rb_jmpbuf_t; +#ifndef setjmp +#ifdef HAVE__SETJMP +#define setjmp(env) _setjmp(env) +#define longjmp(env,val) _longjmp(env,val) +#endif +#endif +#endif + +#include <sys/types.h> +#include <signal.h> +#include <errno.h> + +#if defined(__VMS) +#pragma nostandard +#endif + +#ifdef HAVE_SYS_SELECT_H +#include <sys/select.h> +#endif + +#include <sys/stat.h> + +VALUE rb_cProc; +static VALUE rb_cBinding; +static VALUE proc_invoke _((VALUE,VALUE,VALUE,VALUE)); +static VALUE rb_f_binding _((VALUE)); +static void rb_f_END _((void)); +static VALUE rb_f_block_given_p _((void)); +static VALUE block_pass _((VALUE,NODE*)); +static VALUE rb_cMethod; +static VALUE method_call _((int, VALUE*, VALUE)); +static VALUE rb_cUnboundMethod; +static VALUE umethod_bind _((VALUE, VALUE)); +static VALUE rb_mod_define_method _((int, VALUE*, VALUE)); +NORETURN(static void rb_raise_jump _((VALUE))); +static VALUE rb_make_exception _((int argc, VALUE *argv)); + +static int scope_vmode; +#define SCOPE_PUBLIC 0 +#define SCOPE_PRIVATE 1 +#define SCOPE_PROTECTED 2 +#define SCOPE_MODFUNC 5 +#define SCOPE_MASK 7 +#define SCOPE_SET(f) (scope_vmode=(f)) +#define SCOPE_TEST(f) (scope_vmode&(f)) + +NODE* ruby_current_node; +int ruby_safe_level = 0; +/* safe-level: + 0 - strings from streams/environment/ARGV are tainted (default) + 1 - no dangerous operation by tainted value + 2 - process/file operations prohibited + 3 - all generated objects are tainted + 4 - no global (non-tainted) variable modification/no direct output +*/ + +static VALUE safe_getter _((void)); +static void safe_setter _((VALUE val)); + +void +rb_secure(level) + int level; +{ + if (level <= ruby_safe_level) { + if (ruby_frame->callee) { + rb_raise(rb_eSecurityError, "Insecure operation `%s' at level %d", + rb_id2name(ruby_frame->callee), ruby_safe_level); + } + else { + rb_raise(rb_eSecurityError, "Insecure operation at level %d", ruby_safe_level); + } + } +} + +void +rb_secure_update(obj) + VALUE obj; +{ + if (!OBJ_TAINTED(obj)) rb_secure(4); +} + +void +rb_check_safe_obj(x) + VALUE x; +{ + if (ruby_safe_level > 0 && OBJ_TAINTED(x)){ + if (ruby_frame->callee) { + rb_raise(rb_eSecurityError, "Insecure operation - %s", + rb_id2name(ruby_frame->callee)); + } + else { + rb_raise(rb_eSecurityError, "Insecure operation: -r"); + } + } + rb_secure(4); +} + +void +rb_check_safe_str(x) + VALUE x; +{ + rb_check_safe_obj(x); + if (TYPE(x)!= T_STRING) { + rb_raise(rb_eTypeError, "wrong argument type %s (expected String)", + rb_obj_classname(x)); + } +} + +NORETURN(static void print_undef _((VALUE, ID))); +static void +print_undef(klass, id) + VALUE klass; + ID id; +{ + rb_name_error(id, "undefined method `%s' for %s `%s'", + rb_id2name(id), + (TYPE(klass) == T_MODULE) ? "module" : "class", + rb_class2name(klass)); +} + +static ID removed, singleton_removed, undefined, singleton_undefined; + +#define CACHE_SIZE 0x800 +#define CACHE_MASK 0x7ff +#define EXPR1(c,m) ((((c)>>3)^(m))&CACHE_MASK) + +struct cache_entry { /* method hash table. */ + ID mid; /* method's id */ + ID mid0; /* method's original id */ + VALUE klass; /* receiver's class */ + VALUE origin; /* where method defined */ + NODE *method; + int noex; +}; + +static struct cache_entry cache[CACHE_SIZE]; +static int ruby_running = 0; + +void +rb_clear_cache() +{ + struct cache_entry *ent, *end; + + if (!ruby_running) return; + ent = cache; end = ent + CACHE_SIZE; + while (ent < end) { + ent->mid = 0; + ent++; + } +} + +static void +rb_clear_cache_for_undef(klass, id) + VALUE klass; + ID id; +{ + struct cache_entry *ent, *end; + + if (!ruby_running) return; + ent = cache; end = ent + CACHE_SIZE; + while (ent < end) { + if (ent->origin == klass && ent->mid == id) { + ent->mid = 0; + } + ent++; + } +} + +static void +rb_clear_cache_by_id(id) + ID id; +{ + struct cache_entry *ent, *end; + + if (!ruby_running) return; + ent = cache; end = ent + CACHE_SIZE; + while (ent < end) { + if (ent->mid == id) { + ent->mid = 0; + } + ent++; + } +} + +void +rb_clear_cache_by_class(klass) + VALUE klass; +{ + struct cache_entry *ent, *end; + + if (!ruby_running) return; + ent = cache; end = ent + CACHE_SIZE; + while (ent < end) { + if (ent->klass == klass || ent->origin == klass) { + ent->mid = 0; + } + ent++; + } +} + +static ID init, eqq, each, aref, aset, match, missing; +static ID added, singleton_added; +static ID __id__, __send__, respond_to; + +void +rb_add_method(klass, mid, node, noex) + VALUE klass; + ID mid; + NODE *node; + int noex; +{ + NODE *body; + + if (NIL_P(klass)) klass = rb_cObject; + if (ruby_safe_level >= 4 && (klass == rb_cObject || !OBJ_TAINTED(klass))) { + rb_raise(rb_eSecurityError, "Insecure: can't define method"); + } + if (!FL_TEST(klass, FL_SINGLETON) && + node && nd_type(node) != NODE_ZSUPER && + (mid == rb_intern("initialize" )|| mid == rb_intern("initialize_copy"))) { + noex = NOEX_PRIVATE | noex; + } + else if (FL_TEST(klass, FL_SINGLETON) && node && nd_type(node) == NODE_CFUNC && + mid == rb_intern("allocate")) { + rb_warn("defining %s.allocate is deprecated; use rb_define_alloc_func()", + rb_class2name(rb_iv_get(klass, "__attached__"))); + mid = ID_ALLOCATOR; + } + if (OBJ_FROZEN(klass)) rb_error_frozen("class/module"); + rb_clear_cache_by_id(mid); + body = NEW_METHOD(node, noex); + st_insert(RCLASS(klass)->m_tbl, mid, (st_data_t)body); + if (node && mid != ID_ALLOCATOR && ruby_running) { + if (FL_TEST(klass, FL_SINGLETON)) { + rb_funcall(rb_iv_get(klass, "__attached__"), singleton_added, 1, ID2SYM(mid)); + } + else { + rb_funcall(klass, added, 1, ID2SYM(mid)); + } + } +} + +void +rb_define_alloc_func(klass, func) + VALUE klass; + VALUE (*func) _((VALUE)); +{ + Check_Type(klass, T_CLASS); + rb_add_method(CLASS_OF(klass), ID_ALLOCATOR, NEW_CFUNC(func, 0), NOEX_PRIVATE); +} + +void +rb_undef_alloc_func(klass) + VALUE klass; +{ + Check_Type(klass, T_CLASS); + rb_add_method(CLASS_OF(klass), ID_ALLOCATOR, 0, NOEX_UNDEF); +} + +static NODE* +search_method(klass, id, origin) + VALUE klass, *origin; + ID id; +{ + NODE *body; + + if (!klass) return 0; + while (!st_lookup(RCLASS(klass)->m_tbl, id, (st_data_t *)&body)) { + klass = RCLASS(klass)->super; + if (!klass) return 0; + } + + if (origin) *origin = klass; + return body; +} + +static NODE* +rb_get_method_body(klassp, idp, noexp) + VALUE *klassp; + ID *idp; + int *noexp; +{ + ID id = *idp; + VALUE klass = *klassp; + VALUE origin; + NODE * volatile body; + struct cache_entry *ent; + + if ((body = search_method(klass, id, &origin)) == 0 || !body->nd_body) { + /* store empty info in cache */ + ent = cache + EXPR1(klass, id); + ent->klass = klass; + ent->origin = klass; + ent->mid = ent->mid0 = id; + ent->noex = 0; + ent->method = 0; + + return 0; + } + + if (ruby_running) { + /* store in cache */ + ent = cache + EXPR1(klass, id); + ent->klass = klass; + ent->noex = body->nd_noex; + if (noexp) *noexp = body->nd_noex; + body = body->nd_body; + if (nd_type(body) == NODE_FBODY) { + ent->mid = id; + *klassp = body->nd_orig; + ent->origin = body->nd_orig; + *idp = ent->mid0 = body->nd_mid; + body = ent->method = body->nd_head; + } + else { + *klassp = origin; + ent->origin = origin; + ent->mid = ent->mid0 = id; + ent->method = body; + } + } + else { + if (noexp) *noexp = body->nd_noex; + body = body->nd_body; + if (nd_type(body) == NODE_FBODY) { + *klassp = body->nd_orig; + *idp = body->nd_mid; + body = body->nd_head; + } + else { + *klassp = origin; + } + } + + return body; +} + +NODE* +rb_method_node(klass, id) + VALUE klass; + ID id; +{ + int noex; + struct cache_entry *ent; + + ent = cache + EXPR1(klass, id); + if (ent->mid == id && ent->klass == klass && ent->method){ + return ent->method; + } + + return rb_get_method_body(&klass, &id, &noex); +} + +static void +remove_method(klass, mid) + VALUE klass; + ID mid; +{ + NODE *body; + + if (klass == rb_cObject) { + rb_secure(4); + } + if (ruby_safe_level >= 4 && !OBJ_TAINTED(klass)) { + rb_raise(rb_eSecurityError, "Insecure: can't remove method"); + } + if (OBJ_FROZEN(klass)) rb_error_frozen("class/module"); + if (mid == __id__ || mid == __send__ || mid == init) { + rb_warn("removing `%s' may cause serious problem", rb_id2name(mid)); + } + if (!st_delete(RCLASS(klass)->m_tbl, &mid, (st_data_t *)&body) || + !body->nd_body) { + rb_name_error(mid, "method `%s' not defined in %s", + rb_id2name(mid), rb_class2name(klass)); + } + rb_clear_cache_for_undef(klass, mid); + if (FL_TEST(klass, FL_SINGLETON)) { + rb_funcall(rb_iv_get(klass, "__attached__"), singleton_removed, 1, ID2SYM(mid)); + } + else { + rb_funcall(klass, removed, 1, ID2SYM(mid)); + } +} + +void +rb_remove_method(klass, name) + VALUE klass; + const char *name; +{ + remove_method(klass, rb_intern(name)); +} + +/* + * call-seq: + * remove_method(symbol) => self + * + * Removes the method identified by _symbol_ from the current + * class. For an example, see <code>Module.undef_method</code>. + */ + +static VALUE +rb_mod_remove_method(argc, argv, mod) + int argc; + VALUE *argv; + VALUE mod; +{ + int i; + + for (i=0; i<argc; i++) { + remove_method(mod, rb_to_id(argv[i])); + } + return mod; +} + +#undef rb_disable_super +#undef rb_enable_super + +void +rb_disable_super(klass, name) + VALUE klass; + const char *name; +{ + /* obsolete - no use */ +} + +void +rb_enable_super(klass, name) + VALUE klass; + const char *name; +{ + rb_warning("rb_enable_super() is obsolete"); +} + +static void +rb_export_method(klass, name, noex) + VALUE klass; + ID name; + ID noex; +{ + NODE *body; + VALUE origin; + + if (klass == rb_cObject) { + rb_secure(4); + } + body = search_method(klass, name, &origin); + if (!body && TYPE(klass) == T_MODULE) { + body = search_method(rb_cObject, name, &origin); + } + if (!body || !body->nd_body) { + print_undef(klass, name); + } + if (body->nd_noex != noex) { + if (klass == origin) { + body->nd_noex = noex; + } + else { + rb_add_method(klass, name, NEW_ZSUPER(), noex); + } + } +} + +int +rb_method_boundp(klass, id, ex) + VALUE klass; + ID id; + int ex; +{ + struct cache_entry *ent; + int noex; + + /* is it in the method cache? */ + ent = cache + EXPR1(klass, id); + if (ent->mid == id && ent->klass == klass) { + if (ex && (ent->noex & NOEX_PRIVATE)) + return Qfalse; + if (!ent->method) return Qfalse; + return Qtrue; + } + if (rb_get_method_body(&klass, &id, &noex)) { + if (ex && (noex & NOEX_PRIVATE)) + return Qfalse; + return Qtrue; + } + return Qfalse; +} + +void +rb_attr(klass, id, read, write, ex) + VALUE klass; + ID id; + int read, write, ex; +{ + const char *name; + char *buf; + ID attriv; + int noex; + + if (!ex) noex = NOEX_PUBLIC; + else { + if (SCOPE_TEST(SCOPE_PRIVATE)) { + noex = NOEX_PRIVATE; + rb_warning((scope_vmode == SCOPE_MODFUNC) ? + "attribute accessor as module_function" : + "private attribute?"); + } + else if (SCOPE_TEST(SCOPE_PROTECTED)) { + noex = NOEX_PROTECTED; + } + else { + noex = NOEX_PUBLIC; + } + } + + if (!rb_is_local_id(id) && !rb_is_const_id(id)) { + rb_name_error(id, "invalid attribute name `%s'", rb_id2name(id)); + } + name = rb_id2name(id); + if (!name) { + rb_raise(rb_eArgError, "argument needs to be symbol or string"); + } + buf = ALLOCA_N(char,strlen(name)+2); + sprintf(buf, "@%s", name); + attriv = rb_intern(buf); + if (read) { + rb_add_method(klass, id, NEW_IVAR(attriv), noex); + } + if (write) { + rb_add_method(klass, rb_id_attrset(id), NEW_ATTRSET(attriv), noex); + } +} + +VALUE ruby_errinfo = Qnil; +extern int ruby_nerrs; + +static VALUE rb_eLocalJumpError; +static VALUE rb_eSysStackError; + +extern VALUE ruby_top_self; + +struct FRAME *ruby_frame; +struct SCOPE *ruby_scope; +static struct FRAME *top_frame; +static struct SCOPE *top_scope; + +static unsigned long frame_unique = 0; + +#define PUSH_FRAME() do { \ + struct FRAME _frame; \ + _frame.prev = ruby_frame; \ + _frame.tmp = 0; \ + _frame.node = ruby_current_node; \ + _frame.iter = ruby_iter->iter; \ + _frame.argc = 0; \ + _frame.flags = 0; \ + _frame.uniq = frame_unique++; \ + ruby_frame = &_frame + +#define POP_FRAME() \ + ruby_current_node = _frame.node; \ + ruby_frame = _frame.prev; \ +} while (0) + +struct BLOCK { + NODE *var; + NODE *body; + VALUE self; + struct FRAME frame; + struct SCOPE *scope; + VALUE klass; + NODE *cref; + int iter; + int vmode; + int flags; + int uniq; + struct RVarmap *dyna_vars; + VALUE orig_thread; + VALUE wrapper; + VALUE block_obj; + struct BLOCK *outer; + struct BLOCK *prev; +}; + +#define BLOCK_D_SCOPE 1 +#define BLOCK_LAMBDA 2 +#define BLOCK_FROM_METHOD 4 + +static struct BLOCK *ruby_block; +static unsigned long block_unique = 0; + +#define PUSH_BLOCK(v,b) do { \ + struct BLOCK _block; \ + _block.var = (v); \ + _block.body = (b); \ + _block.self = self; \ + _block.frame = *ruby_frame; \ + _block.klass = ruby_class; \ + _block.cref = ruby_cref; \ + _block.frame.node = ruby_current_node;\ + _block.scope = ruby_scope; \ + _block.prev = ruby_block; \ + _block.outer = ruby_block; \ + _block.iter = ruby_iter->iter; \ + _block.vmode = scope_vmode; \ + _block.flags = BLOCK_D_SCOPE; \ + _block.dyna_vars = ruby_dyna_vars; \ + _block.wrapper = ruby_wrapper; \ + _block.block_obj = 0; \ + _block.uniq = (b)?block_unique++:0; \ + if (b) { \ + prot_tag->blkid = _block.uniq; \ + } \ + ruby_block = &_block + +#define POP_BLOCK() \ + ruby_block = _block.prev; \ +} while (0) + +struct RVarmap *ruby_dyna_vars; +#define PUSH_VARS() do { \ + struct RVarmap * volatile _old; \ + _old = ruby_dyna_vars; \ + ruby_dyna_vars = 0 + +#define POP_VARS() \ + if (_old && (ruby_scope->flags & SCOPE_DONT_RECYCLE)) {\ + if (RBASIC(_old)->flags) /* unless it's already recycled */ \ + FL_SET(_old, DVAR_DONT_RECYCLE); \ + }\ + ruby_dyna_vars = _old; \ +} while (0) + +#define DVAR_DONT_RECYCLE FL_USER2 + +static struct RVarmap* +new_dvar(id, value, prev) + ID id; + VALUE value; + struct RVarmap *prev; +{ + NEWOBJ(vars, struct RVarmap); + OBJSETUP(vars, 0, T_VARMAP); + vars->id = id; + vars->val = value; + vars->next = prev; + + return vars; +} + +VALUE +rb_dvar_defined(id) + ID id; +{ + struct RVarmap *vars = ruby_dyna_vars; + + while (vars) { + if (vars->id == id) return Qtrue; + vars = vars->next; + } + return Qfalse; +} + +VALUE +rb_dvar_curr(id) + ID id; +{ + struct RVarmap *vars = ruby_dyna_vars; + + while (vars) { + if (vars->id == 0) break; + if (vars->id == id) return Qtrue; + vars = vars->next; + } + return Qfalse; +} + +VALUE +rb_dvar_ref(id) + ID id; +{ + struct RVarmap *vars = ruby_dyna_vars; + + while (vars) { + if (vars->id == id) { + return vars->val; + } + vars = vars->next; + } + return Qnil; +} + +void +rb_dvar_push(id, value) + ID id; + VALUE value; +{ + ruby_dyna_vars = new_dvar(id, value, ruby_dyna_vars); +} + +static void +dvar_asgn_internal(id, value, curr) + ID id; + VALUE value; + int curr; +{ + int n = 0; + struct RVarmap *vars = ruby_dyna_vars; + + while (vars) { + if (curr && vars->id == 0) { + /* first null is a dvar header */ + n++; + if (n == 2) break; + } + if (vars->id == id) { + vars->val = value; + return; + } + vars = vars->next; + } + if (!ruby_dyna_vars) { + ruby_dyna_vars = new_dvar(id, value, 0); + } + else { + vars = new_dvar(id, value, ruby_dyna_vars->next); + ruby_dyna_vars->next = vars; + } +} + +static inline void +dvar_asgn(id, value) + ID id; + VALUE value; +{ + dvar_asgn_internal(id, value, 0); +} + +static inline void +dvar_asgn_curr(id, value) + ID id; + VALUE value; +{ + dvar_asgn_internal(id, value, 1); +} + +VALUE * +rb_svar(cnt) + int cnt; +{ + struct RVarmap *vars = ruby_dyna_vars; + ID id; + + if (!ruby_scope->local_tbl) return NULL; + if (cnt >= ruby_scope->local_tbl[0]) return NULL; + id = ruby_scope->local_tbl[cnt+1]; + while (vars) { + if (vars->id == id) return &vars->val; + vars = vars->next; + } + if (ruby_scope->local_vars == 0) return NULL; + return &ruby_scope->local_vars[cnt]; +} + +struct iter { + int iter; + struct iter *prev; +}; +static struct iter *ruby_iter; + +#define ITER_NOT 0 +#define ITER_PRE 1 +#define ITER_CUR 2 + +#define PUSH_ITER(i) do { \ + struct iter _iter; \ + _iter.prev = ruby_iter; \ + _iter.iter = (i); \ + ruby_iter = &_iter + +#define POP_ITER() \ + ruby_iter = _iter.prev; \ +} while (0) + +struct tag { + rb_jmpbuf_t buf; + struct FRAME *frame; + struct iter *iter; + VALUE tag; + VALUE retval; + struct SCOPE *scope; + VALUE dst; + struct tag *prev; + int blkid; +}; +static struct tag *prot_tag; + +#define PUSH_TAG(ptag) do { \ + struct tag _tag; \ + _tag.retval = Qnil; \ + _tag.frame = ruby_frame; \ + _tag.iter = ruby_iter; \ + _tag.prev = prot_tag; \ + _tag.scope = ruby_scope; \ + _tag.tag = ptag; \ + _tag.dst = 0; \ + _tag.blkid = 0; \ + prot_tag = &_tag + +#define PROT_NONE Qfalse /* 0 */ +#define PROT_THREAD Qtrue /* 2 */ +#define PROT_FUNC INT2FIX(0) /* 1 */ +#define PROT_LOOP INT2FIX(1) /* 3 */ +#define PROT_LAMBDA INT2FIX(2) /* 5 */ +#define PROT_YIELD INT2FIX(3) /* 7 */ +#define PROT_TOP INT2FIX(4) /* 9 */ + +#define EXEC_TAG() (FLUSH_REGISTER_WINDOWS, setjmp(prot_tag->buf)) + +#define JUMP_TAG(st) do { \ + ruby_frame = prot_tag->frame; \ + ruby_iter = prot_tag->iter; \ + longjmp(prot_tag->buf,(st)); \ +} while (0) + +#define POP_TAG() \ + prot_tag = _tag.prev; \ +} while (0) + +#define TAG_DST() (_tag.dst == (VALUE)ruby_frame->uniq) + +#define TAG_RETURN 0x1 +#define TAG_BREAK 0x2 +#define TAG_NEXT 0x3 +#define TAG_RETRY 0x4 +#define TAG_REDO 0x5 +#define TAG_RAISE 0x6 +#define TAG_THROW 0x7 +#define TAG_FATAL 0x8 +#define TAG_CONTCALL 0x9 +#define TAG_THREAD 0xa +#define TAG_MASK 0xf + +VALUE ruby_class; +static VALUE ruby_wrapper; /* security wrapper */ + +#define PUSH_CLASS(c) do { \ + VALUE _class = ruby_class; \ + ruby_class = (c) + +#define POP_CLASS() ruby_class = _class; \ +} while (0) + +static NODE *ruby_cref = 0; +static NODE *top_cref; +#define PUSH_CREF(c) ruby_cref = NEW_NODE(NODE_CREF,(c),0,ruby_cref) +#define POP_CREF() ruby_cref = ruby_cref->nd_next + +#define PUSH_SCOPE() do { \ + volatile int _vmode = scope_vmode; \ + struct SCOPE * volatile _old; \ + NEWOBJ(_scope, struct SCOPE); \ + OBJSETUP(_scope, 0, T_SCOPE); \ + _scope->local_tbl = 0; \ + _scope->local_vars = 0; \ + _scope->flags = 0; \ + _old = ruby_scope; \ + ruby_scope = _scope; \ + scope_vmode = SCOPE_PUBLIC + +typedef struct thread * rb_thread_t; +static rb_thread_t curr_thread = 0; +static rb_thread_t main_thread; +static void scope_dup _((struct SCOPE *)); + +#define POP_SCOPE() \ + if (ruby_scope->flags & SCOPE_DONT_RECYCLE) {\ + if (_old) scope_dup(_old); \ + } \ + if (!(ruby_scope->flags & SCOPE_MALLOC)) {\ + ruby_scope->local_vars = 0; \ + ruby_scope->local_tbl = 0; \ + if (!(ruby_scope->flags & SCOPE_DONT_RECYCLE) && \ + ruby_scope != top_scope) { \ + rb_gc_force_recycle((VALUE)ruby_scope);\ + } \ + } \ + ruby_scope->flags |= SCOPE_NOSTACK; \ + ruby_scope = _old; \ + scope_vmode = _vmode; \ +} while (0) + +struct ruby_env { + struct ruby_env *prev; + struct FRAME *frame; + struct SCOPE *scope; + struct BLOCK *block; + struct iter *iter; + struct tag *tag; + NODE *cref; +}; + +static void push_thread_anchor _((struct ruby_env *)); +static void pop_thread_anchor _((struct ruby_env *)); + +#define PUSH_THREAD_TAG() PUSH_TAG(PROT_THREAD); \ + do { \ + struct ruby_env _interp; \ + push_thread_anchor(&_interp); +#define POP_THREAD_TAG() \ + pop_thread_anchor(&_interp); \ + } while (0); \ + POP_TAG() + +static VALUE rb_eval _((VALUE,NODE*)); +static VALUE eval _((VALUE,VALUE,VALUE,char*,int)); +static NODE *compile _((VALUE, char*, int)); + +static VALUE rb_yield_0 _((VALUE, VALUE, VALUE, int, int)); + +#define YIELD_LAMBDA_CALL 1 +#define YIELD_PROC_CALL 2 +#define YIELD_PUBLIC_DEF 4 +#define YIELD_FUNC_AVALUE 1 +#define YIELD_FUNC_SVALUE 2 + +static VALUE rb_call _((VALUE,VALUE,ID,int,const VALUE*,int)); +static VALUE module_setup _((VALUE,NODE*)); + +static VALUE massign _((VALUE,NODE*,VALUE,int)); +static void assign _((VALUE,NODE*,VALUE,int)); + +typedef struct event_hook { + rb_event_hook_func_t func; + rb_event_t events; + struct event_hook *next; +} rb_event_hook_t; + +static rb_event_hook_t *event_hooks; + +#define EXEC_EVENT_HOOK(event, node, self, id, klass) \ + do { \ + rb_event_hook_t *hook; \ + \ + for (hook = event_hooks; hook; hook = hook->next) { \ + if (hook->events & event) \ + (*hook->func)(event, node, self, id, klass); \ + } \ + } while (0) + +static VALUE trace_func = 0; +static int tracing = 0; +static void call_trace_func _((rb_event_t,NODE*,VALUE,ID,VALUE)); + +#if 0 +#define SET_CURRENT_SOURCE() (ruby_sourcefile = ruby_current_node->nd_file, \ + ruby_sourceline = nd_line(ruby_current_node)) +#else +#define SET_CURRENT_SOURCE() ((void)0) +#endif + +void +ruby_set_current_source() +{ + if (ruby_current_node) { + ruby_sourcefile = ruby_current_node->nd_file; + ruby_sourceline = nd_line(ruby_current_node); + } +} + +static void +#ifdef HAVE_STDARG_PROTOTYPES +warn_printf(const char *fmt, ...) +#else +warn_printf(fmt, va_alist) + const char *fmt; + va_dcl +#endif +{ + char buf[BUFSIZ]; + va_list args; + + va_init_list(args, fmt); + vsnprintf(buf, BUFSIZ, fmt, args); + va_end(args); + rb_write_error(buf); +} + +#define warn_print(x) rb_write_error(x) +#define warn_print2(x,l) rb_write_error2(x,l) + +static void +error_pos() +{ + ruby_set_current_source(); + if (ruby_sourcefile) { + if (ruby_frame->callee) { + warn_printf("%s:%d:in `%s'", ruby_sourcefile, ruby_sourceline, + rb_id2name(ruby_frame->callee)); + } + else if (ruby_sourceline == 0) { + warn_printf("%s", ruby_sourcefile); + } + else { + warn_printf("%s:%d", ruby_sourcefile, ruby_sourceline); + } + } +} + +static VALUE +get_backtrace(info) + VALUE info; +{ + if (NIL_P(info)) return Qnil; + info = rb_funcall(info, rb_intern("backtrace"), 0); + if (NIL_P(info)) return Qnil; + return rb_check_array_type(info); +} + +static void +set_backtrace(info, bt) + VALUE info, bt; +{ + rb_funcall(info, rb_intern("set_backtrace"), 1, bt); +} + +static void +error_print() +{ + VALUE errat = Qnil; /* OK */ + volatile VALUE eclass, e; + char *einfo; + long elen; + + if (NIL_P(ruby_errinfo)) return; + + PUSH_TAG(PROT_NONE); + if (EXEC_TAG() == 0) { + errat = get_backtrace(ruby_errinfo); + } + else { + errat = Qnil; + } + if (EXEC_TAG()) goto error; + if (NIL_P(errat)){ + ruby_set_current_source(); + if (ruby_sourcefile) + warn_printf("%s:%d", ruby_sourcefile, ruby_sourceline); + else + warn_printf("%d", ruby_sourceline); + } + else if (RARRAY(errat)->len == 0) { + error_pos(); + } + else { + VALUE mesg = RARRAY(errat)->ptr[0]; + + if (NIL_P(mesg)) error_pos(); + else { + warn_print2(RSTRING(mesg)->ptr, RSTRING(mesg)->len); + } + } + + eclass = CLASS_OF(ruby_errinfo); + if (EXEC_TAG() == 0) { + e = rb_funcall(ruby_errinfo, rb_intern("message"), 0, 0); + StringValue(e); + einfo = RSTRING(e)->ptr; + elen = RSTRING(e)->len; + } + else { + einfo = ""; + elen = 0; + } + if (EXEC_TAG()) goto error; + if (eclass == rb_eRuntimeError && elen == 0) { + warn_print(": unhandled exception\n"); + } + else { + VALUE epath; + + epath = rb_class_name(eclass); + if (elen == 0) { + warn_print(": "); + warn_print2(RSTRING(epath)->ptr, RSTRING(epath)->len); + warn_print("\n"); + } + else { + char *tail = 0; + long len = elen; + + if (RSTRING(epath)->ptr[0] == '#') epath = 0; + if (tail = memchr(einfo, '\n', elen)) { + len = tail - einfo; + tail++; /* skip newline */ + } + warn_print(": "); + warn_print2(einfo, len); + if (epath) { + warn_print(" ("); + warn_print2(RSTRING(epath)->ptr, RSTRING(epath)->len); + warn_print(")\n"); + } + if (tail) { + warn_print2(tail, elen-len-1); + } + } + } + + if (!NIL_P(errat)) { + long i; + struct RArray *ep = RARRAY(errat); + +#define TRACE_MAX (TRACE_HEAD+TRACE_TAIL+5) +#define TRACE_HEAD 8 +#define TRACE_TAIL 5 + + ep = RARRAY(errat); + for (i=1; i<ep->len; i++) { + if (TYPE(ep->ptr[i]) == T_STRING) { + warn_printf("\tfrom %s\n", RSTRING(ep->ptr[i])->ptr); + } + if (i == TRACE_HEAD && ep->len > TRACE_MAX) { + warn_printf("\t ... %ld levels...\n", + ep->len - TRACE_HEAD - TRACE_TAIL); + i = ep->len - TRACE_TAIL; + } + } + } + error: + POP_TAG(); +} + +#if defined(__APPLE__) +#define environ (*_NSGetEnviron()) +#elif !defined(_WIN32) && !defined(__MACOS__) || defined(_WIN32_WCE) +extern char **environ; +#endif +char **rb_origenviron; + +void rb_call_inits _((void)); +void Init_stack _((VALUE*)); +void Init_heap _((void)); +void Init_ext _((void)); + +#ifdef HAVE_NATIVETHREAD +static rb_nativethread_t ruby_thid; +int +is_ruby_native_thread() +{ + return NATIVETHREAD_EQUAL(ruby_thid, NATIVETHREAD_CURRENT()); +} + +# ifdef HAVE_NATIVETHREAD_KILL +void +ruby_native_thread_kill(sig) + int sig; +{ + NATIVETHREAD_KILL(ruby_thid, sig); +} +# endif +#endif + +NORETURN(static void rb_thread_start_1 _((void))); + +void +ruby_init() +{ + static int initialized = 0; + static struct FRAME frame; + static struct iter iter; + int state; + + if (initialized) + return; + initialized = 1; +#ifdef HAVE_NATIVETHREAD + ruby_thid = NATIVETHREAD_CURRENT(); +#endif + + ruby_frame = top_frame = &frame; + ruby_iter = &iter; + +#ifdef __MACOS__ + rb_origenviron = 0; +#else + rb_origenviron = environ; +#endif + + Init_stack((void*)&state); + Init_heap(); + PUSH_SCOPE(); + top_scope = ruby_scope; + /* default visibility is private at toplevel */ + SCOPE_SET(SCOPE_PRIVATE); + + PUSH_TAG(PROT_NONE); + if ((state = EXEC_TAG()) == 0) { + rb_call_inits(); + ruby_class = rb_cObject; + ruby_frame->self = ruby_top_self; + top_cref = rb_node_newnode(NODE_CREF,rb_cObject,0,0); + ruby_cref = top_cref; + rb_define_global_const("TOPLEVEL_BINDING", rb_f_binding(ruby_top_self)); +#ifdef __MACOS__ + _macruby_init(); +#endif + ruby_prog_init(); + ALLOW_INTS; + } + POP_TAG(); + if (state) { + error_print(); + exit(EXIT_FAILURE); + } + POP_SCOPE(); + ruby_scope = top_scope; + top_scope->flags &= ~SCOPE_NOSTACK; + ruby_running = 1; +} + +static VALUE +eval_node(self, node) + VALUE self; + NODE *node; +{ + if (!node) return Qnil; + if (nd_type(node) == NODE_PRELUDE) { + rb_eval(self, node->nd_head); + node = node->nd_body; + } + if (!node) return Qnil; + return rb_eval(self, node); +} + +int ruby_in_eval; + +static void rb_thread_cleanup _((void)); +static void rb_thread_wait_other_threads _((void)); + +static int thread_set_raised(); +static int thread_reset_raised(); + +static VALUE exception_error; +static VALUE sysstack_error; + +static int +error_handle(ex) + int ex; +{ + int status = EXIT_FAILURE; + + if (thread_set_raised()) return EXIT_FAILURE; + switch (ex & TAG_MASK) { + case 0: + status = EXIT_SUCCESS; + break; + + case TAG_RETURN: + error_pos(); + warn_print(": unexpected return\n"); + break; + case TAG_NEXT: + error_pos(); + warn_print(": unexpected next\n"); + break; + case TAG_BREAK: + error_pos(); + warn_print(": unexpected break\n"); + break; + case TAG_REDO: + error_pos(); + warn_print(": unexpected redo\n"); + break; + case TAG_RETRY: + error_pos(); + warn_print(": retry outside of rescue clause\n"); + break; + case TAG_THROW: + if (prot_tag && prot_tag->frame && prot_tag->frame->node) { + NODE *tag = prot_tag->frame->node; + warn_printf("%s:%d: uncaught throw\n", + tag->nd_file, nd_line(tag)); + } + else { + error_pos(); + warn_printf(": unexpected throw\n"); + } + break; + case TAG_RAISE: + case TAG_FATAL: + if (rb_obj_is_kind_of(ruby_errinfo, rb_eSystemExit)) { + VALUE st = rb_iv_get(ruby_errinfo, "status"); + status = NUM2INT(st); + } + else { + error_print(); + } + break; + default: + rb_bug("Unknown longjmp status %d", ex); + break; + } + thread_reset_raised(); + return status; +} + +void +ruby_options(argc, argv) + int argc; + char **argv; +{ + int state; + +#ifdef _WIN32 + argc = rb_w32_cmdvector(GetCommandLine(), &argv); +#endif + + Init_stack((void*)&state); + PUSH_THREAD_TAG(); + if ((state = EXEC_TAG()) == 0) { + ruby_process_options(argc, argv); + } + else { + if (state == TAG_THREAD) { + rb_thread_start_1(); + } + trace_func = 0; + tracing = 0; + exit(error_handle(state)); + } + POP_THREAD_TAG(); + +#ifdef _WIN32_WCE + wce_FreeCommandLine(); +#endif +} + +void rb_exec_end_proc _((void)); + +static void +ruby_finalize_0() +{ + PUSH_TAG(PROT_NONE); + if (EXEC_TAG() == 0) { + rb_trap_exit(); + } + POP_TAG(); + rb_exec_end_proc(); +} + +static void +ruby_finalize_1() +{ + signal(SIGINT, SIG_DFL); + ruby_errinfo = 0; + rb_gc_call_finalizer_at_exit(); + trace_func = 0; + tracing = 0; +} + +void +ruby_finalize() +{ + ruby_finalize_0(); + ruby_finalize_1(); +} + +int +ruby_cleanup(ex) + int ex; +{ + int state; + volatile VALUE err = ruby_errinfo; + + ruby_safe_level = 0; + Init_stack((void*)&state); + PUSH_THREAD_TAG(); + PUSH_ITER(ITER_NOT); + if ((state = EXEC_TAG()) == 0) { + ruby_finalize_0(); + if (ruby_errinfo) err = ruby_errinfo; + rb_thread_cleanup(); + rb_thread_wait_other_threads(); + } + else if (state == TAG_THREAD) { + rb_thread_start_1(); + } + else if (ex == 0) { + ex = state; + } + POP_ITER(); + ruby_errinfo = err; + ex = error_handle(ex); + ruby_finalize_1(); + POP_THREAD_TAG(); + + if (err && rb_obj_is_kind_of(err, rb_eSystemExit)) { + VALUE st = rb_iv_get(err, "status"); + return NUM2INT(st); + } + return ex; +} + +extern NODE *ruby_eval_tree; + +static void cont_call _((VALUE)); + +static int +ruby_exec_internal() +{ + int state; + + PUSH_THREAD_TAG(); + PUSH_ITER(ITER_NOT); + /* default visibility is private at toplevel */ + SCOPE_SET(SCOPE_PRIVATE); + if ((state = EXEC_TAG()) == 0) { + eval_node(ruby_top_self, ruby_eval_tree); + } +#if 0 + else if (state == TAG_CONTCALL) { + cont_call(prot_tag->retval); + } +#endif + else if (state == TAG_THREAD) { + rb_thread_start_1(); + } + POP_ITER(); + POP_THREAD_TAG(); + return state; +} + +int +ruby_exec() +{ + volatile NODE *tmp; + + Init_stack((void*)&tmp); + return ruby_exec_internal(); +} + +void +ruby_stop(ex) + int ex; +{ + exit(ruby_cleanup(ex)); +} + +void +ruby_run() +{ + int state; + static int ex; + + if (ruby_nerrs > 0) exit(EXIT_FAILURE); + state = ruby_exec(); + if (state && !ex) ex = state; + ruby_stop(ex); +} + +static void +compile_error(at) + const char *at; +{ + VALUE str; + + ruby_nerrs = 0; + str = rb_str_buf_new2("compile error"); + if (at) { + rb_str_buf_cat2(str, " in "); + rb_str_buf_cat2(str, at); + } + rb_str_buf_cat(str, "\n", 1); + if (!NIL_P(ruby_errinfo)) { + rb_str_append(str, rb_obj_as_string(ruby_errinfo)); + } + rb_exc_raise(rb_exc_new3(rb_eSyntaxError, str)); +} + +VALUE +rb_eval_string(str) + const char *str; +{ + VALUE v; + NODE *oldsrc = ruby_current_node; + + ruby_current_node = 0; + ruby_sourcefile = rb_source_filename("(eval)"); + v = eval(ruby_top_self, rb_str_new2(str), Qnil, 0, 0); + ruby_current_node = oldsrc; + + return v; +} + +VALUE +rb_eval_string_protect(str, state) + const char *str; + int *state; +{ + return rb_protect((VALUE (*)_((VALUE)))rb_eval_string, (VALUE)str, state); +} + +VALUE +rb_eval_string_wrap(str, state) + const char *str; + int *state; +{ + int status; + VALUE self = ruby_top_self; + VALUE wrapper = ruby_wrapper; + VALUE val; + + PUSH_CLASS(ruby_wrapper = rb_module_new()); + ruby_top_self = rb_obj_clone(ruby_top_self); + rb_extend_object(ruby_top_self, ruby_wrapper); + PUSH_FRAME(); + ruby_frame->callee = 0; + ruby_frame->this_func = 0; + ruby_frame->this_class = 0; + ruby_frame->self = self; + PUSH_CREF(ruby_wrapper); + PUSH_SCOPE(); + + val = rb_eval_string_protect(str, &status); + ruby_top_self = self; + + POP_SCOPE(); + POP_FRAME(); + POP_CLASS(); + ruby_wrapper = wrapper; + if (state) { + *state = status; + } + else if (status) { + JUMP_TAG(status); + } + return val; +} + +NORETURN(static void localjump_error(const char*, VALUE, int)); +static void +localjump_error(mesg, value, reason) + const char *mesg; + VALUE value; + int reason; +{ + VALUE exc = rb_exc_new2(rb_eLocalJumpError, mesg); + ID id; + + rb_iv_set(exc, "@exit_value", value); + switch (reason) { + case TAG_BREAK: + id = rb_intern("break"); break; + case TAG_REDO: + id = rb_intern("redo"); break; + case TAG_RETRY: + id = rb_intern("retry"); break; + case TAG_NEXT: + id = rb_intern("next"); break; + case TAG_RETURN: + id = rb_intern("return"); break; + default: + id = rb_intern("noreason"); break; + } + rb_iv_set(exc, "@reason", ID2SYM(id)); + rb_exc_raise(exc); +} + +/* + * call_seq: + * local_jump_error.exit_value => obj + * + * Returns the exit value associated with this +LocalJumpError+. + */ +static VALUE +localjump_xvalue(exc) + VALUE exc; +{ + return rb_iv_get(exc, "@exit_value"); +} + +/* + * call-seq: + * local_jump_error.reason => symbol + * + * The reason this block was terminated: + * :break, :redo, :retry, :next, :return, or :noreason. + */ + +static VALUE +localjump_reason(exc) + VALUE exc; +{ + return rb_iv_get(exc, "@reason"); +} + +NORETURN(static void jump_tag_but_local_jump _((int,VALUE))); +static void +jump_tag_but_local_jump(state, val) + int state; + VALUE val; +{ + + if (val == Qundef) val = prot_tag->retval; + switch (state) { + case 0: + break; + case TAG_RETURN: + localjump_error("unexpected return", val, state); + break; + case TAG_BREAK: + localjump_error("unexpected break", val, state); + break; + case TAG_NEXT: + localjump_error("unexpected next", val, state); + break; + case TAG_REDO: + localjump_error("unexpected redo", Qnil, state); + break; + case TAG_RETRY: + localjump_error("retry outside of rescue clause", Qnil, state); + break; + default: + break; + } + JUMP_TAG(state); +} + +VALUE +rb_eval_cmd(cmd, arg, level) + VALUE cmd, arg; + int level; +{ + int state; + VALUE val = Qnil; /* OK */ + struct SCOPE *saved_scope; + volatile int safe = ruby_safe_level; + + if (OBJ_TAINTED(cmd)) { + level = 4; + } + if (TYPE(cmd) != T_STRING) { + PUSH_ITER(ITER_NOT); + PUSH_TAG(PROT_NONE); + ruby_safe_level = level; + if ((state = EXEC_TAG()) == 0) { + val = rb_funcall2(cmd, rb_intern("call"), RARRAY(arg)->len, RARRAY(arg)->ptr); + } + ruby_safe_level = safe; + POP_TAG(); + POP_ITER(); + if (state) JUMP_TAG(state); + return val; + } + + saved_scope = ruby_scope; + ruby_scope = top_scope; + PUSH_FRAME(); + ruby_frame->callee = 0; + ruby_frame->this_func = 0; + ruby_frame->this_class = 0; + ruby_frame->self = ruby_top_self; + PUSH_CREF(ruby_wrapper ? ruby_wrapper : rb_cObject); + + ruby_safe_level = level; + + PUSH_TAG(PROT_NONE); + if ((state = EXEC_TAG()) == 0) { + val = eval(ruby_top_self, cmd, Qnil, 0, 0); + } + if (ruby_scope->flags & SCOPE_DONT_RECYCLE) + scope_dup(saved_scope); + ruby_scope = saved_scope; + ruby_safe_level = safe; + POP_TAG(); + POP_FRAME(); + + jump_tag_but_local_jump(state, val); + return val; +} + +#define ruby_cbase (ruby_cref->nd_clss) + +static VALUE +ev_const_defined(cref, id, self) + NODE *cref; + ID id; + VALUE self; +{ + NODE *cbase = cref; + VALUE result; + + while (cbase && cbase->nd_next) { + struct RClass *klass = RCLASS(cbase->nd_clss); + + if (NIL_P(klass)) return rb_const_defined(CLASS_OF(self), id); + if (klass->iv_tbl && st_lookup(klass->iv_tbl, id, &result)) { + if (result == Qundef && NIL_P(rb_autoload_p((VALUE)klass, id))) { + return Qfalse; + } + return Qtrue; + } + cbase = cbase->nd_next; + } + return rb_const_defined(cref->nd_clss, id); +} + +static VALUE +ev_const_get(cref, id, self) + NODE *cref; + ID id; + VALUE self; +{ + NODE *cbase = cref; + VALUE result; + + while (cbase && cbase->nd_next) { + VALUE klass = cbase->nd_clss; + + if (NIL_P(klass)) return rb_const_get(CLASS_OF(self), id); + while (RCLASS(klass)->iv_tbl && st_lookup(RCLASS(klass)->iv_tbl, id, &result)) { + if (result == Qundef) { + rb_autoload_load(klass, id); + continue; + } + return result; + } + cbase = cbase->nd_next; + } + return rb_const_get(cref->nd_clss, id); +} + +static VALUE +cvar_cbase() +{ + NODE *cref = ruby_cref; + + while (cref && cref->nd_next && (NIL_P(cref->nd_clss) || FL_TEST(cref->nd_clss, FL_SINGLETON))) { + cref = cref->nd_next; + if (!cref->nd_next) { + rb_warn("class variable access from toplevel singleton method"); + } + } + if (NIL_P(cref->nd_clss)) { + rb_raise(rb_eTypeError, "no class variables available"); + } + return cref->nd_clss; +} + +/* + * call-seq: + * Module.nesting => array + * + * Returns the list of +Modules+ nested at the point of call. + * + * module M1 + * module M2 + * $a = Module.nesting + * end + * end + * $a #=> [M1::M2, M1] + * $a[0].name #=> "M1::M2" + */ + +static VALUE +rb_mod_nesting() +{ + NODE *cbase = ruby_cref; + VALUE ary = rb_ary_new(); + + while (cbase && cbase->nd_next) { + if (!NIL_P(cbase->nd_clss)) rb_ary_push(ary, cbase->nd_clss); + cbase = cbase->nd_next; + } + if (ruby_wrapper && RARRAY(ary)->len == 0) { + rb_ary_push(ary, ruby_wrapper); + } + return ary; +} + +/* + * call-seq: + * Module.constants => array + * + * Returns an array of the names of all constants defined in the + * system. This list includes the names of all modules and classes. + * + * p Module.constants.sort[1..5] + * + * <em>produces:</em> + * + * ["ARGV", "ArgumentError", "Array", "Bignum", "Binding"] + */ + +static VALUE +rb_mod_s_constants() +{ + NODE *cbase = ruby_cref; + void *data = 0; + + while (cbase) { + if (!NIL_P(cbase->nd_clss)) { + data = rb_mod_const_at(cbase->nd_clss, data); + } + cbase = cbase->nd_next; + } + + if (!NIL_P(ruby_cbase)) { + data = rb_mod_const_of(ruby_cbase, data); + } + return rb_const_list(data); +} + +void +rb_frozen_class_p(klass) + VALUE klass; +{ + char *desc = "something(?!)"; + + if (OBJ_FROZEN(klass)) { + if (FL_TEST(klass, FL_SINGLETON)) + desc = "object"; + else { + switch (TYPE(klass)) { + case T_MODULE: + case T_ICLASS: + desc = "module"; break; + case T_CLASS: + desc = "class"; break; + } + } + rb_error_frozen(desc); + } +} + +void +rb_undef(klass, id) + VALUE klass; + ID id; +{ + VALUE origin; + NODE *body; + + if (ruby_cbase == rb_cObject && klass == rb_cObject) { + rb_secure(4); + } + if (ruby_safe_level >= 4 && !OBJ_TAINTED(klass)) { + rb_raise(rb_eSecurityError, "Insecure: can't undef `%s'", rb_id2name(id)); + } + rb_frozen_class_p(klass); + if (id == __id__ || id == __send__ || id == init) { + rb_warn("undefining `%s' may cause serious problem", rb_id2name(id)); + } + body = search_method(klass, id, &origin); + if (!body || !body->nd_body) { + char *s0 = " class"; + VALUE c = klass; + + if (FL_TEST(c, FL_SINGLETON)) { + VALUE obj = rb_iv_get(klass, "__attached__"); + + switch (TYPE(obj)) { + case T_MODULE: + case T_CLASS: + c = obj; + s0 = ""; + } + } + else if (TYPE(c) == T_MODULE) { + s0 = " module"; + } + rb_name_error(id, "undefined method `%s' for%s `%s'", + rb_id2name(id),s0,rb_class2name(c)); + } + rb_add_method(klass, id, 0, NOEX_PUBLIC); + if (FL_TEST(klass, FL_SINGLETON)) { + rb_funcall(rb_iv_get(klass, "__attached__"), + singleton_undefined, 1, ID2SYM(id)); + } + else { + rb_funcall(klass, undefined, 1, ID2SYM(id)); + } +} + +/* + * call-seq: + * undef_method(symbol) => self + * + * Prevents the current class from responding to calls to the named + * method. Contrast this with <code>remove_method</code>, which deletes + * the method from the particular class; Ruby will still search + * superclasses and mixed-in modules for a possible receiver. + * + * class Parent + * def hello + * puts "In parent" + * end + * end + * class Child < Parent + * def hello + * puts "In child" + * end + * end + * + * + * c = Child.new + * c.hello + * + * + * class Child + * remove_method :hello # remove from child, still in parent + * end + * c.hello + * + * + * class Child + * undef_method :hello # prevent any calls to 'hello' + * end + * c.hello + * + * <em>produces:</em> + * + * In child + * In parent + * prog.rb:23: undefined method `hello' for #<Child:0x401b3bb4> (NoMethodError) + */ + +static VALUE +rb_mod_undef_method(argc, argv, mod) + int argc; + VALUE *argv; + VALUE mod; +{ + int i; + + for (i=0; i<argc; i++) { + rb_undef(mod, rb_to_id(argv[i])); + } + return mod; +} + +void +rb_alias(klass, name, def) + VALUE klass; + ID name, def; +{ + VALUE origin; + NODE *orig, *body, *node; + VALUE singleton = 0; + + rb_frozen_class_p(klass); + if (name == def) return; + if (klass == rb_cObject) { + rb_secure(4); + } + orig = search_method(klass, def, &origin); + if (!orig || !orig->nd_body) { + if (TYPE(klass) == T_MODULE) { + orig = search_method(rb_cObject, def, &origin); + } + } + if (!orig || !orig->nd_body) { + print_undef(klass, def); + } + if (FL_TEST(klass, FL_SINGLETON)) { + singleton = rb_iv_get(klass, "__attached__"); + } + body = orig->nd_body; + orig->nd_cnt++; + if (nd_type(body) == NODE_FBODY) { /* was alias */ + def = body->nd_mid; + origin = body->nd_orig; + body = body->nd_head; + } + + rb_clear_cache_by_id(name); + if (RTEST(ruby_verbose) && st_lookup(RCLASS(klass)->m_tbl, name, (st_data_t *)&node)) { + if (node->nd_cnt == 0 && node->nd_body) { + rb_warning("discarding old %s", rb_id2name(name)); + } + } + st_insert(RCLASS(klass)->m_tbl, name, + (st_data_t)NEW_METHOD(NEW_FBODY(body, def, origin), orig->nd_noex)); + if (singleton) { + rb_funcall(singleton, singleton_added, 1, ID2SYM(name)); + } + else { + rb_funcall(klass, added, 1, ID2SYM(name)); + } +} + +/* + * call-seq: + * alias_method(new_name, old_name) => self + * + * Makes <i>new_name</i> a new copy of the method <i>old_name</i>. This can + * be used to retain access to methods that are overridden. + * + * module Mod + * alias_method :orig_exit, :exit + * def exit(code=0) + * puts "Exiting with code #{code}" + * orig_exit(code) + * end + * end + * include Mod + * exit(99) + * + * <em>produces:</em> + * + * Exiting with code 99 + */ + +static VALUE +rb_mod_alias_method(mod, newname, oldname) + VALUE mod, newname, oldname; +{ + rb_alias(mod, rb_to_id(newname), rb_to_id(oldname)); + return mod; +} + +static NODE* +copy_node_scope(node, rval) + NODE *node; + NODE *rval; +{ + NODE *copy = NEW_NODE(NODE_SCOPE,0,rval,node->nd_next); + + if (node->nd_tbl) { + copy->nd_tbl = ALLOC_N(ID, node->nd_tbl[0]+1); + MEMCPY(copy->nd_tbl, node->nd_tbl, ID, node->nd_tbl[0]+1); + } + else { + copy->nd_tbl = 0; + } + return copy; +} + +#ifdef C_ALLOCA +# define TMP_PROTECT NODE * volatile tmp__protect_tmp=0 +# define TMP_ALLOC(n) \ + (tmp__protect_tmp = rb_node_newnode(NODE_ALLOCA, \ + ALLOC_N(VALUE,n),tmp__protect_tmp,n), \ + (void*)tmp__protect_tmp->nd_head) +#else +# define TMP_PROTECT typedef int foobazzz +# define TMP_ALLOC(n) ALLOCA_N(VALUE,n) +#endif + +#define SETUP_ARGS0(anode,alen) do {\ + NODE *n = anode;\ + if (!n) {\ + argc = 0;\ + argv = 0;\ + }\ + else if (nd_type(n) == NODE_ARRAY) {\ + argc=alen;\ + if (argc > 0) {\ + int i;\ + n = anode;\ + argv = TMP_ALLOC(argc);\ + for (i=0;i<argc;i++) {\ + argv[i] = rb_eval(self,n->nd_head);\ + n=n->nd_next;\ + }\ + }\ + else {\ + argc = 0;\ + argv = 0;\ + }\ + }\ + else {\ + VALUE args = rb_eval(self,n);\ + if (TYPE(args) != T_ARRAY)\ + args = rb_ary_to_ary(args);\ + argc = RARRAY(args)->len;\ + argv = ALLOCA_N(VALUE, argc);\ + MEMCPY(argv, RARRAY(args)->ptr, VALUE, argc);\ + }\ +} while (0) + +#define SETUP_ARGS(anode) SETUP_ARGS0(anode, anode->nd_alen) + +#define BEGIN_CALLARGS do {\ + struct BLOCK *tmp_block = ruby_block;\ + int tmp_iter = ruby_iter->iter;\ + if (tmp_iter == ITER_PRE) {\ + ruby_block = ruby_block->outer;\ + tmp_iter = ITER_NOT;\ + }\ + PUSH_ITER(tmp_iter) + +#define END_CALLARGS \ + ruby_block = tmp_block;\ + POP_ITER();\ +} while (0) + +#define MATCH_DATA *rb_svar(node->nd_cnt) + +static const char* is_defined _((VALUE, NODE*, char*, int)); + +static char* +arg_defined(self, node, buf, type) + VALUE self; + NODE *node; + char *buf; + char *type; +{ + int argc; + int i; + + if (!node) return type; /* no args */ + if (nd_type(node) == NODE_ARRAY) { + argc=node->nd_alen; + if (argc > 0) { + for (i=0;i<argc;i++) { + if (!is_defined(self, node->nd_head, buf, 0)) + return 0; + node = node->nd_next; + } + } + } + else if (!is_defined(self, node, buf, 0)) { + return 0; + } + return type; +} + +static const char* +is_defined(self, node, buf, noeval) + VALUE self; + NODE *node; /* OK */ + char *buf; + int noeval; +{ + VALUE val; /* OK */ + int state; + static const char *ex = "expression"; + + if (!node) return ex; + switch (nd_type(node)) { + case NODE_SUPER: + case NODE_ZSUPER: + if (ruby_frame->this_func == 0) return 0; + else if (ruby_frame->this_class == 0) return 0; + val = ruby_frame->this_class; + if (rb_method_boundp(RCLASS(val)->super, ruby_frame->this_func, 0)) { + if (nd_type(node) == NODE_SUPER) { + return arg_defined(self, node->nd_args, buf, "super"); + } + return "super"; + } + break; + + case NODE_VCALL: + case NODE_FCALL: + val = self; + goto check_bound; + + case NODE_ATTRASGN: + val = self; + if (node->nd_recv == (NODE *)1) goto check_bound; + case NODE_CALL: + if (!is_defined(self, node->nd_recv, buf, Qtrue)) return 0; + if (noeval) return ex; + val = rb_eval(self, node->nd_recv); + check_bound: + { + int call = nd_type(node)==NODE_CALL; + + val = CLASS_OF(val); + if (call) { + int noex; + ID id = node->nd_mid; + + if (!rb_get_method_body(&val, &id, &noex)) + break; + if ((noex & NOEX_PRIVATE)) + break; + if ((noex & NOEX_PROTECTED) && + !rb_obj_is_kind_of(self, rb_class_real(val))) + break; + } + else if (!rb_method_boundp(val, node->nd_mid, call)) + break; + return arg_defined(self, node->nd_args, buf, + nd_type(node) == NODE_ATTRASGN ? + "assignment" : "method"); + } + break; + + case NODE_MATCH2: + case NODE_MATCH3: + return "method"; + + case NODE_YIELD: + if (rb_block_given_p()) { + return "yield"; + } + break; + + case NODE_SELF: + return "self"; + + case NODE_NIL: + return "nil"; + + case NODE_TRUE: + return "true"; + + case NODE_FALSE: + return "false"; + + case NODE_ATTRSET: + case NODE_OP_ASGN1: + case NODE_OP_ASGN2: + case NODE_MASGN: + case NODE_LASGN: + case NODE_DASGN: + case NODE_DASGN_CURR: + case NODE_GASGN: + case NODE_IASGN: + case NODE_CDECL: + case NODE_CVDECL: + case NODE_CVASGN: + return "assignment"; + + case NODE_LVAR: + return "local-variable"; + case NODE_DVAR: + return "local-variable(in-block)"; + + case NODE_GVAR: + if (rb_gvar_defined(node->nd_entry)) { + return "global-variable"; + } + break; + + case NODE_IVAR: + if (rb_ivar_defined(self, node->nd_vid)) { + return "instance-variable"; + } + break; + + case NODE_CONST: + if (ev_const_defined(ruby_cref, node->nd_vid, self)) { + return "constant"; + } + break; + + case NODE_CVAR: + if (rb_cvar_defined(cvar_cbase(), node->nd_vid)) { + return "class variable"; + } + break; + + case NODE_COLON2: + if (!is_defined(self, node->nd_recv, buf, Qtrue)) return 0; + if (noeval) return ex; + val = rb_eval(self, node->nd_recv); + switch (TYPE(val)) { + case T_CLASS: + case T_MODULE: + if (rb_const_defined_from(val, node->nd_mid)) + return "constant"; + break; + default: + if (rb_method_boundp(CLASS_OF(val), node->nd_mid, 1)) { + return "method"; + } + } + break; + + case NODE_COLON3: + if (rb_const_defined_from(rb_cObject, node->nd_mid)) { + return "constant"; + } + break; + + case NODE_NTH_REF: + if (RTEST(rb_reg_nth_defined(node->nd_nth, MATCH_DATA))) { + if (!buf) return ex; + sprintf(buf, "$%d", (int)node->nd_nth); + return buf; + } + break; + + case NODE_BACK_REF: + if (RTEST(rb_reg_nth_defined(0, MATCH_DATA))) { + if (!buf) return ex; + sprintf(buf, "$%c", (char)node->nd_nth); + return buf; + } + break; + + default: + PUSH_TAG(PROT_NONE); + if ((state = EXEC_TAG()) == 0) { + rb_eval(self, node); + } + POP_TAG(); + if (!state) { + return ex; + } + ruby_errinfo = Qnil; + break; + } + return 0; +} + +static int handle_rescue _((VALUE,NODE*)); + +static void blk_free(); + +static VALUE +rb_obj_is_proc(proc) + VALUE proc; +{ + if (TYPE(proc) == T_DATA && RDATA(proc)->dfree == (RUBY_DATA_FUNC)blk_free) { + return Qtrue; + } + return Qfalse; +} + +void +rb_add_event_hook(func, events) + rb_event_hook_func_t func; + rb_event_t events; +{ + rb_event_hook_t *hook; + + hook = ALLOC(rb_event_hook_t); + hook->func = func; + hook->events = events; + hook->next = event_hooks; + event_hooks = hook; +} + +int +rb_remove_event_hook(func) + rb_event_hook_func_t func; +{ + rb_event_hook_t *prev, *hook; + + prev = NULL; + hook = event_hooks; + while (hook) { + if (hook->func == func) { + if (prev) { + prev->next = hook->next; + } + else { + event_hooks = hook->next; + } + xfree(hook); + return 0; + } + prev = hook; + hook = hook->next; + } + return -1; +} + +/* + * call-seq: + * set_trace_func(proc) => proc + * set_trace_func(nil) => nil + * + * Establishes _proc_ as the handler for tracing, or disables + * tracing if the parameter is +nil+. _proc_ takes up + * to six parameters: an event name, a filename, a line number, an + * object id, a binding, and the name of a class. _proc_ is + * invoked whenever an event occurs. Events are: <code>c-call</code> + * (call a C-language routine), <code>c-return</code> (return from a + * C-language routine), <code>call</code> (call a Ruby method), + * <code>class</code> (start a class or module definition), + * <code>end</code> (finish a class or module definition), + * <code>line</code> (execute code on a new line), <code>raise</code> + * (raise an exception), and <code>return</code> (return from a Ruby + * method). Tracing is disabled within the context of _proc_. + * + * class Test + * def test + * a = 1 + * b = 2 + * end + * end + * + * set_trace_func proc { |event, file, line, id, binding, classname| + * printf "%8s %s:%-2d %10s %8s\n", event, file, line, id, classname + * } + * t = Test.new + * t.test + * + * line prog.rb:11 false + * c-call prog.rb:11 new Class + * c-call prog.rb:11 initialize Object + * c-return prog.rb:11 initialize Object + * c-return prog.rb:11 new Class + * line prog.rb:12 false + * call prog.rb:2 test Test + * line prog.rb:3 test Test + * line prog.rb:4 test Test + * return prog.rb:4 test Test + */ + + +static VALUE +set_trace_func(obj, trace) + VALUE obj, trace; +{ + rb_event_hook_t *hook; + + if (NIL_P(trace)) { + trace_func = 0; + rb_remove_event_hook(call_trace_func); + return Qnil; + } + if (!rb_obj_is_proc(trace)) { + rb_raise(rb_eTypeError, "trace_func needs to be Proc"); + } + trace_func = trace; + for (hook = event_hooks; hook; hook = hook->next) { + if (hook->func == call_trace_func) + return trace; + } + rb_add_event_hook(call_trace_func, RUBY_EVENT_ALL); + return trace; +} + +static char * +get_event_name(rb_event_t event) +{ + switch (event) { + case RUBY_EVENT_LINE: + return "line"; + case RUBY_EVENT_CLASS: + return "class"; + case RUBY_EVENT_END: + return "end"; + case RUBY_EVENT_CALL: + return "call"; + case RUBY_EVENT_RETURN: + return "return"; + case RUBY_EVENT_C_CALL: + return "c-call"; + case RUBY_EVENT_C_RETURN: + return "c-return"; + case RUBY_EVENT_RAISE: + return "raise"; + default: + return "unknown"; + } +} + +static void +call_trace_func(event, node, self, id, klass) + rb_event_t event; + NODE *node; + VALUE self; + ID id; + VALUE klass; /* OK */ +{ + int state, raised; + struct FRAME *prev; + NODE *node_save; + VALUE srcfile; + char *event_name; + + if (!trace_func) return; + if (tracing) return; + if (id == ID_ALLOCATOR) return; + if (!node && ruby_sourceline == 0) return; + + if (!(node_save = ruby_current_node)) { + node_save = NEW_BEGIN(0); + } + tracing = 1; + prev = ruby_frame; + PUSH_FRAME(); + *ruby_frame = *prev; + ruby_frame->prev = prev; + ruby_frame->iter = 0; /* blocks not available anyway */ + + if (node) { + ruby_current_node = node; + ruby_frame->node = node; + ruby_sourcefile = node->nd_file; + ruby_sourceline = nd_line(node); + } + if (klass) { + if (TYPE(klass) == T_ICLASS) { + klass = RBASIC(klass)->klass; + } + else if (FL_TEST(klass, FL_SINGLETON)) { + klass = self; + } + } + PUSH_TAG(PROT_NONE); + raised = thread_reset_raised(); + if ((state = EXEC_TAG()) == 0) { + srcfile = rb_str_new2(ruby_sourcefile?ruby_sourcefile:"(ruby)"); + event_name = get_event_name(event); + proc_invoke(trace_func, rb_ary_new3(6, rb_str_new2(event_name), + srcfile, + INT2FIX(ruby_sourceline), + id?ID2SYM(id):Qnil, + self ? rb_f_binding(self) : Qnil, + klass?klass:Qnil), + Qundef, 0); + } + if (raised) thread_set_raised(); + POP_TAG(); + POP_FRAME(); + + tracing = 0; + ruby_current_node = node_save; + SET_CURRENT_SOURCE(); + if (state) JUMP_TAG(state); +} + +static VALUE +avalue_to_svalue(v) + VALUE v; +{ + VALUE tmp, top; + + tmp = rb_check_array_type(v); + if (NIL_P(tmp)) { + return v; + } + if (RARRAY(tmp)->len == 0) { + return Qundef; + } + if (RARRAY(tmp)->len == 1) { + top = rb_check_array_type(RARRAY(tmp)->ptr[0]); + if (NIL_P(top)) { + return RARRAY(tmp)->ptr[0]; + } + if (RARRAY(top)->len > 1) { + return v; + } + return top; + } + return tmp; +} + +static VALUE +svalue_to_avalue(v) + VALUE v; +{ + VALUE tmp, top; + + if (v == Qundef) return rb_ary_new2(0); + tmp = rb_check_array_type(v); + if (NIL_P(tmp)) { + return rb_ary_new3(1, v); + } + if (RARRAY(tmp)->len == 1) { + top = rb_check_array_type(RARRAY(tmp)->ptr[0]); + if (!NIL_P(top) && RARRAY(top)->len > 1) { + return tmp; + } + return rb_ary_new3(1, v); + } + return tmp; +} + +static VALUE +svalue_to_mrhs(v, lhs) + VALUE v; + NODE *lhs; +{ + VALUE tmp; + + if (v == Qundef) return rb_values_new2(0, 0); + tmp = rb_check_array_type(v); + if (NIL_P(tmp)) { + return rb_values_new(1, v); + } + /* no lhs means splat lhs only */ + if (!lhs) { + return rb_values_new(1, v); + } + return tmp; +} + +static VALUE +avalue_splat(v) + VALUE v; +{ + if (RARRAY(v)->len == 0) { + return Qundef; + } + if (RARRAY(v)->len == 1) { + return RARRAY(v)->ptr[0]; + } + return v; +} + +static VALUE +splat_value(v) + VALUE v; +{ + VALUE val; + + if (NIL_P(v)) val = rb_ary_new3(1, Qnil); + else val = rb_Array(v); + return rb_values_from_ary(val); +} + +static VALUE +class_prefix(self, cpath) + VALUE self; + NODE *cpath; +{ + if (!cpath) { + rb_bug("class path missing"); + } + if (cpath->nd_head) { + VALUE c = rb_eval(self, cpath->nd_head); + switch (TYPE(c)) { + case T_CLASS: + case T_MODULE: + break; + default: + rb_raise(rb_eTypeError, "%s is not a class/module", + RSTRING(rb_obj_as_string(c))->ptr); + } + return c; + } + else if (nd_type(cpath) == NODE_COLON2) { + return ruby_cbase; + } + else if (ruby_wrapper) { + return ruby_wrapper; + } + else { + return rb_cObject; + } +} + +#define return_value(v) do {\ + if ((prot_tag->retval = (v)) == Qundef) {\ + prot_tag->retval = Qnil;\ + }\ +} while (0) + +NORETURN(static void return_jump _((VALUE))); +NORETURN(static void break_jump _((VALUE))); + +static VALUE +rb_eval(self, n) + VALUE self; + NODE *n; +{ + NODE * volatile contnode = 0; + NODE * volatile node = n; + int state; + volatile VALUE result = Qnil; + +#define RETURN(v) do { \ + result = (v); \ + goto finish; \ +} while (0) + + again: + if (!node) RETURN(Qnil); + + ruby_current_node = node; + if (node->flags & NODE_NEWLINE) { + EXEC_EVENT_HOOK(RUBY_EVENT_LINE, node, self, + ruby_frame->this_func, + ruby_frame->this_class); + } + switch (nd_type(node)) { + case NODE_BLOCK: + if (contnode) { + result = rb_eval(self, node); + break; + } + contnode = node->nd_next; + node = node->nd_head; + goto again; + + case NODE_POSTEXE: + rb_f_END(); + nd_set_type(node, NODE_NIL); /* exec just once */ + result = Qnil; + break; + + /* begin .. end without clauses */ + case NODE_BEGIN: + node = node->nd_body; + goto again; + + /* nodes for speed-up(default match) */ + case NODE_MATCH: + result = rb_reg_match2(node->nd_lit); + break; + + /* nodes for speed-up(literal match) */ + case NODE_MATCH2: + { + VALUE l = rb_eval(self,node->nd_recv); + VALUE r = rb_eval(self,node->nd_value); + result = rb_reg_match(l, r); + } + break; + + /* nodes for speed-up(literal match) */ + case NODE_MATCH3: + { + VALUE r = rb_eval(self,node->nd_recv); + VALUE l = rb_eval(self,node->nd_value); + if (TYPE(l) == T_STRING) { + result = rb_reg_match(r, l); + } + else { + result = rb_funcall(l, match, 1, r); + } + } + break; + + /* node for speed-up(top-level loop for -n/-p) */ + case NODE_OPT_N: + PUSH_TAG(PROT_LOOP); + switch (state = EXEC_TAG()) { + case 0: + opt_n_next: + while (!NIL_P(rb_gets())) { + opt_n_redo: + rb_eval(self, node->nd_body); + } + break; + + case TAG_REDO: + state = 0; + goto opt_n_redo; + case TAG_NEXT: + state = 0; + goto opt_n_next; + case TAG_BREAK: + state = 0; + default: + break; + } + POP_TAG(); + if (state) JUMP_TAG(state); + RETURN(Qnil); + + case NODE_SELF: + RETURN(self); + + case NODE_NIL: + RETURN(Qnil); + + case NODE_TRUE: + RETURN(Qtrue); + + case NODE_FALSE: + RETURN(Qfalse); + + case NODE_ERRINFO: + RETURN(ruby_errinfo); + + case NODE_IF: + EXEC_EVENT_HOOK(RUBY_EVENT_LINE, node, self, + ruby_frame->this_func, + ruby_frame->this_class); + if (RTEST(rb_eval(self, node->nd_cond))) { + node = node->nd_body; + } + else { + node = node->nd_else; + } + goto again; + + case NODE_WHEN: + while (node) { + NODE *tag; + + if (nd_type(node) != NODE_WHEN) goto again; + tag = node->nd_head; + while (tag) { + EXEC_EVENT_HOOK(RUBY_EVENT_LINE, tag, self, + ruby_frame->this_func, + ruby_frame->this_class); + if (tag->nd_head && nd_type(tag->nd_head) == NODE_WHEN) { + VALUE v = rb_eval(self, tag->nd_head->nd_head); + long i; + + if (TYPE(v) != T_ARRAY) v = rb_ary_to_ary(v); + for (i=0; i<RARRAY(v)->len; i++) { + if (RTEST(RARRAY(v)->ptr[i])) { + node = node->nd_body; + goto again; + } + } + tag = tag->nd_next; + continue; + } + if (RTEST(rb_eval(self, tag->nd_head))) { + node = node->nd_body; + goto again; + } + tag = tag->nd_next; + } + node = node->nd_next; + } + RETURN(Qnil); + + case NODE_CASE: + { + VALUE val; + + val = rb_eval(self, node->nd_head); + node = node->nd_body; + while (node) { + NODE *tag; + + if (nd_type(node) != NODE_WHEN) { + goto again; + } + tag = node->nd_head; + while (tag) { + EXEC_EVENT_HOOK(RUBY_EVENT_LINE, tag, self, + ruby_frame->this_func, + ruby_frame->this_class); + if (tag->nd_head && nd_type(tag->nd_head) == NODE_WHEN) { + VALUE v = rb_eval(self, tag->nd_head->nd_head); + long i; + + if (TYPE(v) != T_ARRAY) v = rb_ary_to_ary(v); + for (i=0; i<RARRAY(v)->len; i++) { + if (RTEST(rb_funcall2(RARRAY(v)->ptr[i], eqq, 1, &val))){ + node = node->nd_body; + goto again; + } + } + tag = tag->nd_next; + continue; + } + if (RTEST(rb_funcall2(rb_eval(self, tag->nd_head), eqq, 1, &val))) { + node = node->nd_body; + goto again; + } + tag = tag->nd_next; + } + node = node->nd_next; + } + } + RETURN(Qnil); + + case NODE_WHILE: + PUSH_TAG(PROT_LOOP); + result = Qnil; + switch (state = EXEC_TAG()) { + case 0: + if (node->nd_state && !RTEST(rb_eval(self, node->nd_cond))) + goto while_out; + do { + while_redo: + rb_eval(self, node->nd_body); + while_next: + ; + } while (RTEST(rb_eval(self, node->nd_cond))); + break; + + case TAG_REDO: + state = 0; + goto while_redo; + case TAG_NEXT: + state = 0; + goto while_next; + case TAG_BREAK: + if (TAG_DST()) { + state = 0; + result = prot_tag->retval; + } + /* fall through */ + default: + break; + } + while_out: + POP_TAG(); + if (state) JUMP_TAG(state); + RETURN(result); + + case NODE_UNTIL: + PUSH_TAG(PROT_LOOP); + result = Qnil; + switch (state = EXEC_TAG()) { + case 0: + if (node->nd_state && RTEST(rb_eval(self, node->nd_cond))) + goto until_out; + do { + until_redo: + rb_eval(self, node->nd_body); + until_next: + ; + } while (!RTEST(rb_eval(self, node->nd_cond))); + break; + + case TAG_REDO: + state = 0; + goto until_redo; + case TAG_NEXT: + state = 0; + goto until_next; + case TAG_BREAK: + if (TAG_DST()) { + state = 0; + result = prot_tag->retval; + } + /* fall through */ + default: + break; + } + until_out: + POP_TAG(); + if (state) JUMP_TAG(state); + RETURN(result); + + case NODE_BLOCK_PASS: + result = block_pass(self, node); + break; + + case NODE_ITER: + case NODE_FOR: + case NODE_LAMBDA: + { + PUSH_TAG(PROT_LOOP); + PUSH_BLOCK(node->nd_var, node->nd_body); + + state = EXEC_TAG(); + if (state == 0) { + iter_retry: + PUSH_ITER(ITER_PRE); + if (nd_type(node) == NODE_ITER) { + result = rb_eval(self, node->nd_iter); + } + else if (nd_type(node) == NODE_LAMBDA) { + ruby_iter->iter = ruby_frame->iter = ITER_CUR; + result = rb_block_proc(); + } + else { + VALUE recv; + + _block.flags &= ~BLOCK_D_SCOPE; + BEGIN_CALLARGS; + recv = rb_eval(self, node->nd_iter); + END_CALLARGS; + ruby_current_node = node; + SET_CURRENT_SOURCE(); + result = rb_call(CLASS_OF(recv),recv,each,0,0,0); + } + POP_ITER(); + } + else if (state == TAG_BREAK && TAG_DST()) { + result = prot_tag->retval; + state = 0; + } + else if (state == TAG_RETRY && ruby_block == &_block) { + state = 0; + goto iter_retry; + } + POP_BLOCK(); + POP_TAG(); + switch (state) { + case 0: + break; + default: + JUMP_TAG(state); + } + } + break; + + case NODE_BREAK: + break_jump(rb_eval(self, node->nd_stts)); + break; + + case NODE_NEXT: + CHECK_INTS; + return_value(rb_eval(self, node->nd_stts)); + JUMP_TAG(TAG_NEXT); + break; + + case NODE_REDO: + CHECK_INTS; + JUMP_TAG(TAG_REDO); + break; + + case NODE_RETRY: + CHECK_INTS; + JUMP_TAG(TAG_RETRY); + break; + + case NODE_SPLAT: + result = splat_value(rb_eval(self, node->nd_head)); + break; + + case NODE_TO_ARY: + result = rb_ary_to_ary(rb_eval(self, node->nd_head)); + break; + + case NODE_SVALUE: + result = avalue_splat(rb_eval(self, node->nd_head)); + if (result == Qundef) result = Qnil; + break; + + case NODE_YIELD: + if (node->nd_head) { + result = rb_eval(self, node->nd_head); + ruby_current_node = node; + } + else { + result = Qundef; /* no arg */ + } + SET_CURRENT_SOURCE(); + result = rb_yield_0(result, 0, 0, 0, node->nd_state); + break; + + case NODE_RESCUE: + { + volatile VALUE e_info = ruby_errinfo; + volatile int rescuing = 0; + + PUSH_TAG(PROT_NONE); + if ((state = EXEC_TAG()) == 0) { + retry_entry: + result = rb_eval(self, node->nd_head); + } + else if (rescuing) { + if (rescuing < 0) { + /* in rescue argument, just reraise */ + } + else if (state == TAG_RETRY) { + rescuing = state = 0; + ruby_errinfo = e_info; + goto retry_entry; + } + else if (state != TAG_RAISE) { + result = prot_tag->retval; + } + } + else if (state == TAG_RAISE) { + NODE *resq = node->nd_resq; + + rescuing = -1; + while (resq) { + ruby_current_node = resq; + if (handle_rescue(self, resq)) { + state = 0; + rescuing = 1; + result = rb_eval(self, resq->nd_body); + break; + } + resq = resq->nd_head; /* next rescue */ + } + } + else { + result = prot_tag->retval; + } + POP_TAG(); + if (state != TAG_RAISE) ruby_errinfo = e_info; + if (state) { + if (state == TAG_NEXT) prot_tag->retval = result; + JUMP_TAG(state); + } + /* no exception raised */ + if (!rescuing && (node = node->nd_else)) { /* else clause given */ + goto again; + } + } + break; + + case NODE_ENSURE: + PUSH_TAG(PROT_NONE); + if ((state = EXEC_TAG()) == 0) { + result = rb_eval(self, node->nd_head); + } + POP_TAG(); + if (node->nd_ensr) { + VALUE retval = prot_tag->retval; /* save retval */ + VALUE errinfo = ruby_errinfo; + + rb_eval(self, node->nd_ensr); + return_value(retval); + ruby_errinfo = errinfo; + } + if (state) JUMP_TAG(state); + break; + + case NODE_AND: + result = rb_eval(self, node->nd_1st); + if (!RTEST(result)) break; + node = node->nd_2nd; + goto again; + + case NODE_OR: + result = rb_eval(self, node->nd_1st); + if (RTEST(result)) break; + node = node->nd_2nd; + goto again; + + case NODE_NOT: + if (RTEST(rb_eval(self, node->nd_body))) result = Qfalse; + else result = Qtrue; + break; + + case NODE_DOT2: + case NODE_DOT3: + result = rb_range_new(rb_eval(self, node->nd_beg), + rb_eval(self, node->nd_end), + nd_type(node) == NODE_DOT3); + break; + + case NODE_FLIP2: /* like AWK */ + { + VALUE *flip = rb_svar(node->nd_cnt); + if (!flip) rb_bug("unexpected local variable"); + if (!RTEST(*flip)) { + if (RTEST(rb_eval(self, node->nd_beg))) { + *flip = RTEST(rb_eval(self, node->nd_end))?Qfalse:Qtrue; + result = Qtrue; + } + else { + result = Qfalse; + } + } + else { + if (RTEST(rb_eval(self, node->nd_end))) { + *flip = Qfalse; + } + result = Qtrue; + } + } + break; + + case NODE_FLIP3: /* like SED */ + { + VALUE *flip = rb_svar(node->nd_cnt); + if (!flip) rb_bug("unexpected local variable"); + if (!RTEST(*flip)) { + result = RTEST(rb_eval(self, node->nd_beg)) ? Qtrue : Qfalse; + *flip = result; + } + else { + if (RTEST(rb_eval(self, node->nd_end))) { + *flip = Qfalse; + } + result = Qtrue; + } + } + break; + + case NODE_RETURN: + return_jump(rb_eval(self, node->nd_stts)); + break; + + case NODE_ARGSCAT: + { + VALUE args = rb_eval(self, node->nd_head); + result = rb_ary_concat(args, splat_value(rb_eval(self, node->nd_body))); + } + break; + + case NODE_ARGSPUSH: + { + VALUE args = rb_ary_dup(rb_eval(self, node->nd_head)); + result = rb_ary_push(args, rb_eval(self, node->nd_body)); + } + break; + + case NODE_ATTRASGN: + { + VALUE recv; + int argc; VALUE *argv; /* used in SETUP_ARGS */ + int scope; + TMP_PROTECT; + + BEGIN_CALLARGS; + if (node->nd_recv == (NODE *)1) { + recv = self; + scope = 1; + } + else { + recv = rb_eval(self, node->nd_recv); + scope = 0; + } + SETUP_ARGS(node->nd_args); + END_CALLARGS; + + ruby_current_node = node; + SET_CURRENT_SOURCE(); + rb_call(CLASS_OF(recv),recv,node->nd_mid,argc,argv,scope); + result = argv[argc-1]; + } + break; + + case NODE_CALL: + { + VALUE recv; + int argc; VALUE *argv; /* used in SETUP_ARGS */ + TMP_PROTECT; + + BEGIN_CALLARGS; + recv = rb_eval(self, node->nd_recv); + SETUP_ARGS(node->nd_args); + END_CALLARGS; + + ruby_current_node = node; + SET_CURRENT_SOURCE(); + result = rb_call(CLASS_OF(recv),recv,node->nd_mid,argc,argv,0); + } + break; + + case NODE_FCALL: + { + int argc; VALUE *argv; /* used in SETUP_ARGS */ + TMP_PROTECT; + + BEGIN_CALLARGS; + SETUP_ARGS(node->nd_args); + END_CALLARGS; + + ruby_current_node = node; + SET_CURRENT_SOURCE(); + result = rb_call(CLASS_OF(self),self,node->nd_mid,argc,argv,1); + } + break; + + case NODE_VCALL: + SET_CURRENT_SOURCE(); + result = rb_call(CLASS_OF(self),self,node->nd_mid,0,0,2); + break; + + case NODE_SUPER: + case NODE_ZSUPER: + { + int argc; VALUE *argv; /* used in SETUP_ARGS */ + TMP_PROTECT; + + if (ruby_frame->this_class == 0) { + if (ruby_frame->this_func) { + rb_name_error(ruby_frame->callee, + "superclass method `%s' disabled", + rb_id2name(ruby_frame->this_func)); + } + else { + rb_raise(rb_eNoMethodError, "super called outside of method"); + } + } + if (nd_type(node) == NODE_ZSUPER) { + argc = ruby_frame->argc; + if (argc && ruby_frame->prev && + (ruby_frame->prev->flags & FRAME_DMETH)) { + if (TYPE(RBASIC(ruby_scope)->klass) != T_ARRAY || + RARRAY(RBASIC(ruby_scope)->klass)->len != argc) { + rb_raise(rb_eRuntimeError, + "super: specify arguments explicitly"); + } + argv = RARRAY(RBASIC(ruby_scope)->klass)->ptr; + } + else { + argv = ruby_scope->local_vars + 2; + } + } + else { + BEGIN_CALLARGS; + SETUP_ARGS(node->nd_args); + END_CALLARGS; + ruby_current_node = node; + } + + SET_CURRENT_SOURCE(); + result = rb_call_super(argc, argv); + } + break; + + case NODE_SCOPE: + { + struct FRAME frame; + NODE *saved_cref = 0; + + frame = *ruby_frame; + frame.tmp = ruby_frame; + ruby_frame = &frame; + + PUSH_SCOPE(); + PUSH_TAG(PROT_NONE); + if (node->nd_rval) { + saved_cref = ruby_cref; + ruby_cref = (NODE*)node->nd_rval; + } + if (node->nd_tbl) { + VALUE *vars = ALLOCA_N(VALUE, node->nd_tbl[0]+1); + *vars++ = (VALUE)node; + ruby_scope->local_vars = vars; + rb_mem_clear(ruby_scope->local_vars, node->nd_tbl[0]); + ruby_scope->local_tbl = node->nd_tbl; + } + else { + ruby_scope->local_vars = 0; + ruby_scope->local_tbl = 0; + } + if ((state = EXEC_TAG()) == 0) { + result = rb_eval(self, node->nd_next); + } + POP_TAG(); + POP_SCOPE(); + ruby_frame = frame.tmp; + if (saved_cref) + ruby_cref = saved_cref; + if (state) JUMP_TAG(state); + } + break; + + case NODE_OP_ASGN1: + { + int argc; VALUE *argv; /* used in SETUP_ARGS */ + VALUE recv, val; + NODE *rval; + TMP_PROTECT; + + recv = rb_eval(self, node->nd_recv); + rval = node->nd_args->nd_head; + SETUP_ARGS0(node->nd_args->nd_next, node->nd_args->nd_alen - 1); + val = rb_funcall2(recv, aref, argc-1, argv); + switch (node->nd_mid) { + case 0: /* OR */ + if (RTEST(val)) RETURN(val); + val = rb_eval(self, rval); + break; + case 1: /* AND */ + if (!RTEST(val)) RETURN(val); + val = rb_eval(self, rval); + break; + default: + val = rb_funcall(val, node->nd_mid, 1, rb_eval(self, rval)); + } + argv[argc-1] = val; + rb_funcall2(recv, aset, argc, argv); + result = val; + } + break; + + case NODE_OP_ASGN2: + { + ID id = node->nd_next->nd_vid; + VALUE recv, val; + + recv = rb_eval(self, node->nd_recv); + val = rb_funcall(recv, id, 0); + switch (node->nd_next->nd_mid) { + case 0: /* OR */ + if (RTEST(val)) RETURN(val); + val = rb_eval(self, node->nd_value); + break; + case 1: /* AND */ + if (!RTEST(val)) RETURN(val); + val = rb_eval(self, node->nd_value); + break; + default: + val = rb_funcall(val, node->nd_next->nd_mid, 1, + rb_eval(self, node->nd_value)); + } + + rb_funcall2(recv, node->nd_next->nd_aid, 1, &val); + result = val; + } + break; + + case NODE_OP_ASGN_AND: + result = rb_eval(self, node->nd_head); + if (!RTEST(result)) break; + node = node->nd_value; + goto again; + + case NODE_OP_ASGN_OR: + if ((node->nd_aid && !is_defined(self, node->nd_head, 0, 0)) || + !RTEST(result = rb_eval(self, node->nd_head))) { + node = node->nd_value; + goto again; + } + break; + + case NODE_MASGN: + result = massign(self, node, rb_eval(self, node->nd_value), 0); + break; + + case NODE_LASGN: + if (ruby_scope->local_vars == 0) + rb_bug("unexpected local variable assignment"); + result = rb_eval(self, node->nd_value); + ruby_scope->local_vars[node->nd_cnt] = result; + break; + + case NODE_DASGN: + result = rb_eval(self, node->nd_value); + dvar_asgn(node->nd_vid, result); + break; + + case NODE_DASGN_CURR: + result = rb_eval(self, node->nd_value); + dvar_asgn_curr(node->nd_vid, result); + break; + + case NODE_GASGN: + result = rb_eval(self, node->nd_value); + rb_gvar_set(node->nd_entry, result); + break; + + case NODE_IASGN: + result = rb_eval(self, node->nd_value); + rb_ivar_set(self, node->nd_vid, result); + break; + + case NODE_CDECL: + result = rb_eval(self, node->nd_value); + if (node->nd_vid == 0) { + rb_const_set(class_prefix(self, node->nd_else), node->nd_else->nd_mid, result); + } + else { + if (NIL_P(ruby_cbase)) { + rb_raise(rb_eTypeError, "no class/module to define constant"); + } + rb_const_set(ruby_cbase, node->nd_vid, result); + } + break; + + case NODE_CVDECL: + if (NIL_P(ruby_cbase)) { + rb_raise(rb_eTypeError, "no class/module to define class variable"); + } + result = rb_eval(self, node->nd_value); + rb_cvar_set(cvar_cbase(), node->nd_vid, result, Qtrue); + break; + + case NODE_CVASGN: + result = rb_eval(self, node->nd_value); + rb_cvar_set(cvar_cbase(), node->nd_vid, result, Qfalse); + break; + + case NODE_LVAR: + if (ruby_scope->local_vars == 0) { + rb_bug("unexpected local variable"); + } + result = ruby_scope->local_vars[node->nd_cnt]; + break; + + case NODE_DVAR: + result = rb_dvar_ref(node->nd_vid); + break; + + case NODE_GVAR: + result = rb_gvar_get(node->nd_entry); + break; + + case NODE_IVAR: + result = rb_ivar_get(self, node->nd_vid); + break; + + case NODE_CONST: + result = ev_const_get(ruby_cref, node->nd_vid, self); + break; + + case NODE_CVAR: + result = rb_cvar_get(cvar_cbase(), node->nd_vid); + break; + + case NODE_BLOCK_ARG: + if (ruby_scope->local_vars == 0) + rb_bug("unexpected block argument"); + if (rb_block_given_p()) { + result = rb_block_proc(); + ruby_scope->local_vars[node->nd_cnt] = result; + } + else { + result = Qnil; + } + break; + + case NODE_COLON2: + { + VALUE klass; + + klass = rb_eval(self, node->nd_head); + if (rb_is_const_id(node->nd_mid)) { + switch (TYPE(klass)) { + case T_CLASS: + case T_MODULE: + result = rb_const_get_from(klass, node->nd_mid); + break; + default: + rb_raise(rb_eTypeError, "%s is not a class/module", + RSTRING(rb_obj_as_string(klass))->ptr); + break; + } + } + else { + result = rb_funcall(klass, node->nd_mid, 0, 0); + } + } + break; + + case NODE_COLON3: + result = rb_const_get_from(rb_cObject, node->nd_mid); + break; + + case NODE_NTH_REF: + result = rb_reg_nth_match(node->nd_nth, MATCH_DATA); + break; + + case NODE_BACK_REF: + switch (node->nd_nth) { + case '&': + result = rb_reg_last_match(MATCH_DATA); + break; + case '`': + result = rb_reg_match_pre(MATCH_DATA); + break; + case '\'': + result = rb_reg_match_post(MATCH_DATA); + break; + case '+': + result = rb_reg_match_last(MATCH_DATA); + break; + default: + rb_bug("unexpected back-ref"); + } + break; + + case NODE_HASH: + { + NODE *list; + VALUE hash = rb_hash_new(); + VALUE key, val; + + list = node->nd_head; + while (list) { + key = rb_eval(self, list->nd_head); + list = list->nd_next; + if (list == 0) + rb_bug("odd number list for Hash"); + val = rb_eval(self, list->nd_head); + list = list->nd_next; + rb_hash_aset(hash, key, val); + } + result = hash; + } + break; + + case NODE_ZARRAY: /* zero length list */ + result = rb_ary_new(); + break; + + case NODE_ARRAY: + { + VALUE ary; + long i; + + i = node->nd_alen; + ary = rb_ary_new2(i); + for (i=0;node;node=node->nd_next) { + RARRAY(ary)->ptr[i++] = rb_eval(self, node->nd_head); + RARRAY(ary)->len = i; + } + + result = ary; + } + break; + + case NODE_VALUES: + { + VALUE val; + long i; + + i = node->nd_alen; + val = rb_values_new2(i, 0); + for (i=0;node;node=node->nd_next) { + RARRAY(val)->ptr[i++] = rb_eval(self, node->nd_head); + RARRAY(val)->len = i; + } + + result = val; + } + break; + + case NODE_STR: + result = rb_str_new3(node->nd_lit); + break; + + case NODE_EVSTR: + result = rb_obj_as_string(rb_eval(self, node->nd_body)); + break; + + case NODE_DSTR: + case NODE_DXSTR: + case NODE_DREGX: + case NODE_DREGX_ONCE: + case NODE_DSYM: + { + VALUE str, str2; + NODE *list = node->nd_next; + + str = rb_str_new3(node->nd_lit); + while (list) { + if (list->nd_head) { + switch (nd_type(list->nd_head)) { + case NODE_STR: + str2 = list->nd_head->nd_lit; + break; + default: + str2 = rb_eval(self, list->nd_head); + break; + } + rb_str_append(str, str2); + OBJ_INFECT(str, str2); + } + list = list->nd_next; + } + switch (nd_type(node)) { + case NODE_DREGX: + result = rb_reg_new(RSTRING(str)->ptr, RSTRING(str)->len, + node->nd_cflag); + break; + case NODE_DREGX_ONCE: /* regexp expand once */ + result = rb_reg_new(RSTRING(str)->ptr, RSTRING(str)->len, + node->nd_cflag); + nd_set_type(node, NODE_LIT); + node->nd_lit = result; + break; + case NODE_LIT: + /* other thread may replace NODE_DREGX_ONCE to NODE_LIT */ + goto again; + case NODE_DXSTR: + result = rb_funcall(self, '`', 1, str); + break; + case NODE_DSYM: + result = rb_str_intern(str); + break; + default: + result = str; + break; + } + } + break; + + case NODE_XSTR: + result = rb_funcall(self, '`', 1, rb_str_new3(node->nd_lit)); + break; + + case NODE_LIT: + result = node->nd_lit; + break; + + case NODE_DEFN: + if (node->nd_defn) { + NODE *body, *defn; + VALUE origin; + int noex; + + if (NIL_P(ruby_class)) { + rb_raise(rb_eTypeError, "no class/module to add method"); + } + if (ruby_class == rb_cObject && node->nd_mid == init) { + rb_warn("redefining Object#initialize may cause infinite loop"); + } + if (node->nd_mid == __id__ || node->nd_mid == __send__) { + rb_warn("redefining `%s' may cause serious problem", + rb_id2name(node->nd_mid)); + } + rb_frozen_class_p(ruby_class); + body = search_method(ruby_class, node->nd_mid, &origin); + if (body){ + if (RTEST(ruby_verbose) && ruby_class == origin && body->nd_cnt == 0 && body->nd_body) { + rb_warning("method redefined; discarding old %s", rb_id2name(node->nd_mid)); + } + } + + if (SCOPE_TEST(SCOPE_PRIVATE) || node->nd_mid == init) { + noex = NOEX_PRIVATE; + } + else if (SCOPE_TEST(SCOPE_PROTECTED)) { + noex = NOEX_PROTECTED; + } + else { + noex = NOEX_PUBLIC; + } + if (body && origin == ruby_class && body->nd_body == 0) { + noex |= NOEX_NOSUPER; + } + + defn = copy_node_scope(node->nd_defn, ruby_cref); + rb_add_method(ruby_class, node->nd_mid, defn, noex); + if (scope_vmode == SCOPE_MODFUNC) { + rb_add_method(rb_singleton_class(ruby_class), + node->nd_mid, defn, NOEX_PUBLIC); + } + result = Qnil; + } + break; + + case NODE_DEFS: + if (node->nd_defn) { + VALUE recv = rb_eval(self, node->nd_recv); + VALUE klass; + NODE *body = 0, *defn; + + if (ruby_safe_level >= 4 && !OBJ_TAINTED(recv)) { + rb_raise(rb_eSecurityError, "Insecure: can't define singleton method"); + } + if (FIXNUM_P(recv) || SYMBOL_P(recv)) { + rb_raise(rb_eTypeError, + "can't define singleton method \"%s\" for %s", + rb_id2name(node->nd_mid), + rb_obj_classname(recv)); + } + + if (OBJ_FROZEN(recv)) rb_error_frozen("object"); + klass = rb_singleton_class(recv); + if (st_lookup(RCLASS(klass)->m_tbl, node->nd_mid, (st_data_t *)&body)) { + if (ruby_safe_level >= 4) { + rb_raise(rb_eSecurityError, "redefining method prohibited"); + } + if (RTEST(ruby_verbose)) { + rb_warning("redefine %s", rb_id2name(node->nd_mid)); + } + } + defn = copy_node_scope(node->nd_defn, ruby_cref); + rb_add_method(klass, node->nd_mid, defn, + NOEX_PUBLIC|(body?body->nd_noex&NOEX_UNDEF:0)); + result = Qnil; + } + break; + + case NODE_UNDEF: + if (NIL_P(ruby_class)) { + rb_raise(rb_eTypeError, "no class to undef method"); + } + rb_undef(ruby_class, rb_to_id(rb_eval(self, node->u2.node))); + result = Qnil; + break; + + case NODE_ALIAS: + if (NIL_P(ruby_class)) { + rb_raise(rb_eTypeError, "no class to make alias"); + } + rb_alias(ruby_class, rb_to_id(rb_eval(self, node->u1.node)), + rb_to_id(rb_eval(self, node->u2.node))); + result = Qnil; + break; + + case NODE_VALIAS: + rb_alias_variable(node->u1.id, node->u2.id); + result = Qnil; + break; + + case NODE_CLASS: + { + VALUE super, klass, tmp, cbase; + ID cname; + int gen = Qfalse; + + cbase = class_prefix(self, node->nd_cpath); + cname = node->nd_cpath->nd_mid; + + if (NIL_P(ruby_cbase)) { + rb_raise(rb_eTypeError, "no outer class/module"); + } + if (node->nd_super) { + super = rb_eval(self, node->nd_super); + rb_check_inheritable(super); + } + else { + super = 0; + } + + if (rb_const_defined_at(cbase, cname)) { + klass = rb_const_get_at(cbase, cname); + if (TYPE(klass) != T_CLASS) { + rb_raise(rb_eTypeError, "%s is not a class", + rb_id2name(cname)); + } + if (super) { + tmp = rb_class_real(RCLASS(klass)->super); + if (tmp != super) { + rb_raise(rb_eTypeError, "superclass mismatch for class %s", + rb_id2name(cname)); + } + super = 0; + } + if (ruby_safe_level >= 4) { + rb_raise(rb_eSecurityError, "extending class prohibited"); + } + } + else { + if (!super) super = rb_cObject; + klass = rb_define_class_id(cname, super); + rb_set_class_path(klass, cbase, rb_id2name(cname)); + rb_const_set(cbase, cname, klass); + gen = Qtrue; + } + if (ruby_wrapper) { + rb_extend_object(klass, ruby_wrapper); + rb_include_module(klass, ruby_wrapper); + } + if (super && gen) { + rb_class_inherited(super, klass); + } + result = module_setup(klass, node); + } + break; + + case NODE_MODULE: + { + VALUE module, cbase; + ID cname; + + if (NIL_P(ruby_cbase)) { + rb_raise(rb_eTypeError, "no outer class/module"); + } + cbase = class_prefix(self, node->nd_cpath); + cname = node->nd_cpath->nd_mid; + if (rb_const_defined_at(cbase, cname)) { + module = rb_const_get_at(cbase, cname); + if (TYPE(module) != T_MODULE) { + rb_raise(rb_eTypeError, "%s is not a module", + rb_id2name(cname)); + } + if (ruby_safe_level >= 4) { + rb_raise(rb_eSecurityError, "extending module prohibited"); + } + } + else { + module = rb_define_module_id(cname); + rb_set_class_path(module, cbase, rb_id2name(cname)); + rb_const_set(cbase, cname, module); + } + if (ruby_wrapper) { + rb_extend_object(module, ruby_wrapper); + rb_include_module(module, ruby_wrapper); + } + + result = module_setup(module, node); + } + break; + + case NODE_SCLASS: + { + VALUE klass; + + result = rb_eval(self, node->nd_recv); + if (FIXNUM_P(result) || SYMBOL_P(result)) { + rb_raise(rb_eTypeError, "no singleton class for %s", + rb_obj_classname(result)); + } + if (ruby_safe_level >= 4 && !OBJ_TAINTED(result)) + rb_raise(rb_eSecurityError, "Insecure: can't extend object"); + klass = rb_singleton_class(result); + + if (ruby_wrapper) { + rb_extend_object(klass, ruby_wrapper); + rb_include_module(klass, ruby_wrapper); + } + + result = module_setup(klass, node); + } + break; + + case NODE_DEFINED: + { + char buf[20]; + const char *desc = is_defined(self, node->nd_head, buf, 0); + + if (desc) result = rb_str_new2(desc); + else result = Qnil; + } + break; + + default: + rb_bug("unknown node type %d", nd_type(node)); + } + finish: + CHECK_INTS; + if (contnode) { + node = contnode; + contnode = 0; + goto again; + } + return result; +} + +static VALUE +module_setup(module, n) + VALUE module; + NODE *n; +{ + NODE * volatile node = n->nd_body; + int state; + struct FRAME frame; + VALUE result = Qnil; /* OK */ + TMP_PROTECT; + + frame = *ruby_frame; + frame.tmp = ruby_frame; + ruby_frame = &frame; + + PUSH_CLASS(module); + PUSH_SCOPE(); + PUSH_VARS(); + + if (node->nd_tbl) { + VALUE *vars = TMP_ALLOC(node->nd_tbl[0]+1); + *vars++ = (VALUE)node; + ruby_scope->local_vars = vars; + rb_mem_clear(ruby_scope->local_vars, node->nd_tbl[0]); + ruby_scope->local_tbl = node->nd_tbl; + } + else { + ruby_scope->local_vars = 0; + ruby_scope->local_tbl = 0; + } + + PUSH_CREF(module); + PUSH_TAG(PROT_NONE); + if ((state = EXEC_TAG()) == 0) { + EXEC_EVENT_HOOK(RUBY_EVENT_CLASS, n, ruby_cbase, + ruby_frame->this_func, ruby_frame->this_class); + result = rb_eval(ruby_cbase, node->nd_next); + } + POP_TAG(); + POP_CREF(); + POP_VARS(); + POP_SCOPE(); + POP_CLASS(); + + ruby_frame = frame.tmp; + EXEC_EVENT_HOOK(RUBY_EVENT_END, n, 0, ruby_frame->this_func, + ruby_frame->this_class); + if (state) JUMP_TAG(state); + + return result; +} + +static NODE *basic_respond_to = 0; + +int +rb_respond_to(obj, id) + VALUE obj; + ID id; +{ + VALUE klass = CLASS_OF(obj); + if (rb_method_node(klass, respond_to) == basic_respond_to && + rb_method_boundp(klass, id, 0)) { + return Qtrue; + } + else{ + return rb_funcall(obj, respond_to, 1, ID2SYM(id)); + } + return Qfalse; +} + + +/* + * call-seq: + * obj.respond_to?(symbol, include_private=false) => true or false + * + * Returns +true+> if _obj_ responds to the given + * method. Private methods are included in the search only if the + * optional second parameter evaluates to +true+. + */ + +static VALUE +rb_obj_respond_to(argc, argv, obj) + int argc; + VALUE *argv; + VALUE obj; +{ + VALUE mid, priv; + ID id; + + rb_scan_args(argc, argv, "11", &mid, &priv); + id = rb_to_id(mid); + if (rb_method_boundp(CLASS_OF(obj), id, !RTEST(priv))) { + return Qtrue; + } + return Qfalse; +} + +/* + * call-seq: + * mod.method_defined?(symbol) => true or false + * + * Returns +true+ if the named method is defined by + * _mod_ (or its included modules and, if _mod_ is a class, + * its ancestors). Public and protected methods are matched. + * + * module A + * def method1() end + * end + * class B + * def method2() end + * end + * class C < B + * include A + * def method3() end + * end + * + * A.method_defined? :method1 #=> true + * C.method_defined? "method1" #=> true + * C.method_defined? "method2" #=> true + * C.method_defined? "method3" #=> true + * C.method_defined? "method4" #=> false + */ + +static VALUE +rb_mod_method_defined(mod, mid) + VALUE mod, mid; +{ + return rb_method_boundp(mod, rb_to_id(mid), 1); +} + +#define VISI_CHECK(x,f) (((x)&NOEX_MASK) == (f)) + +/* + * call-seq: + * mod.public_method_defined?(symbol) => true or false + * + * Returns +true+ if the named public method is defined by + * _mod_ (or its included modules and, if _mod_ is a class, + * its ancestors). + * + * module A + * def method1() end + * end + * class B + * protected + * def method2() end + * end + * class C < B + * include A + * def method3() end + * end + * + * A.method_defined? :method1 #=> true + * C.public_method_defined? "method1" #=> true + * C.public_method_defined? "method2" #=> false + * C.method_defined? "method2" #=> true + */ + +static VALUE +rb_mod_public_method_defined(mod, mid) + VALUE mod, mid; +{ + ID id = rb_to_id(mid); + int noex; + + if (rb_get_method_body(&mod, &id, &noex)) { + if (VISI_CHECK(noex, NOEX_PUBLIC)) + return Qtrue; + } + return Qfalse; +} + +/* + * call-seq: + * mod.private_method_defined?(symbol) => true or false + * + * Returns +true+ if the named private method is defined by + * _ mod_ (or its included modules and, if _mod_ is a class, + * its ancestors). + * + * module A + * def method1() end + * end + * class B + * private + * def method2() end + * end + * class C < B + * include A + * def method3() end + * end + * + * A.method_defined? :method1 #=> true + * C.private_method_defined? "method1" #=> false + * C.private_method_defined? "method2" #=> true + * C.method_defined? "method2" #=> false + */ + +static VALUE +rb_mod_private_method_defined(mod, mid) + VALUE mod, mid; +{ + ID id = rb_to_id(mid); + int noex; + + if (rb_get_method_body(&mod, &id, &noex)) { + if (VISI_CHECK(noex, NOEX_PRIVATE)) + return Qtrue; + } + return Qfalse; +} + +/* + * call-seq: + * mod.protected_method_defined?(symbol) => true or false + * + * Returns +true+ if the named protected method is defined + * by _mod_ (or its included modules and, if _mod_ is a + * class, its ancestors). + * + * module A + * def method1() end + * end + * class B + * protected + * def method2() end + * end + * class C < B + * include A + * def method3() end + * end + * + * A.method_defined? :method1 #=> true + * C.protected_method_defined? "method1" #=> false + * C.protected_method_defined? "method2" #=> true + * C.method_defined? "method2" #=> true + */ + +static VALUE +rb_mod_protected_method_defined(mod, mid) + VALUE mod, mid; +{ + ID id = rb_to_id(mid); + int noex; + + if (rb_get_method_body(&mod, &id, &noex)) { + if (VISI_CHECK(noex, NOEX_PROTECTED)) + return Qtrue; + } + return Qfalse; +} + +NORETURN(static VALUE terminate_process _((int, const char *, long))); +static VALUE +terminate_process(status, mesg, mlen) + int status; + const char *mesg; + long mlen; +{ + VALUE args[2]; + args[0] = INT2NUM(status); + args[1] = rb_str_new(mesg, mlen); + + rb_exc_raise(rb_class_new_instance(2, args, rb_eSystemExit)); +} + +void +rb_exit(status) + int status; +{ + if (prot_tag) { + terminate_process(status, "exit", 4); + } + ruby_finalize(); + exit(status); +} + + +/* + * call-seq: + * exit(integer=0) + * Kernel::exit(integer=0) + * Process::exit(integer=0) + * + * Initiates the termination of the Ruby script by raising the + * <code>SystemExit</code> exception. This exception may be caught. The + * optional parameter is used to return a status code to the invoking + * environment. + * + * begin + * exit + * puts "never get here" + * rescue SystemExit + * puts "rescued a SystemExit exception" + * end + * puts "after begin block" + * + * <em>produces:</em> + * + * rescued a SystemExit exception + * after begin block + * + * Just prior to termination, Ruby executes any <code>at_exit</code> functions + * (see Kernel::at_exit) and runs any object finalizers (see + * ObjectSpace::define_finalizer). + * + * at_exit { puts "at_exit function" } + * ObjectSpace.define_finalizer("string", proc { puts "in finalizer" }) + * exit + * + * <em>produces:</em> + * + * at_exit function + * in finalizer + */ + +VALUE +rb_f_exit(argc, argv) + int argc; + VALUE *argv; +{ + VALUE status; + int istatus; + + rb_secure(4); + if (rb_scan_args(argc, argv, "01", &status) == 1) { + switch (status) { + case Qtrue: + istatus = EXIT_SUCCESS; + break; + case Qfalse: + istatus = EXIT_FAILURE; + break; + default: + istatus = NUM2INT(status); + break; + } + } + else { + istatus = EXIT_SUCCESS; + } + rb_exit(istatus); + return Qnil; /* not reached */ +} + + +/* + * call-seq: + * abort + * Kernel::abort + * Process::abort + * + * Terminate execution immediately, effectively by calling + * <code>Kernel.exit(1)</code>. If _msg_ is given, it is written + * to STDERR prior to terminating. + */ + +VALUE +rb_f_abort(argc, argv) + int argc; + VALUE *argv; +{ + rb_secure(4); + if (argc == 0) { + if (!NIL_P(ruby_errinfo)) { + error_print(); + } + rb_exit(EXIT_FAILURE); + } + else { + VALUE mesg; + + rb_scan_args(argc, argv, "1", &mesg); + StringValue(argv[0]); + rb_io_puts(argc, argv, rb_stderr); + terminate_process(EXIT_FAILURE, RSTRING(argv[0])->ptr, RSTRING(argv[0])->len); + } + return Qnil; /* not reached */ +} + +void +rb_iter_break() +{ + break_jump(Qnil); +} + +NORETURN(static void rb_longjmp _((int, VALUE))); +static VALUE make_backtrace _((void)); + +static void +rb_longjmp(tag, mesg) + int tag; + VALUE mesg; +{ + VALUE at; + + if (thread_set_raised()) { + ruby_errinfo = exception_error; + JUMP_TAG(TAG_FATAL); + } + if (NIL_P(mesg)) mesg = ruby_errinfo; + if (NIL_P(mesg)) { + mesg = rb_exc_new(rb_eRuntimeError, 0, 0); + } + + ruby_set_current_source(); + if (ruby_sourcefile && !NIL_P(mesg)) { + at = get_backtrace(mesg); + if (NIL_P(at)) { + at = make_backtrace(); + set_backtrace(mesg, at); + } + } + if (!NIL_P(mesg)) { + ruby_errinfo = mesg; + } + + if (RTEST(ruby_debug) && !NIL_P(ruby_errinfo) + && !rb_obj_is_kind_of(ruby_errinfo, rb_eSystemExit)) { + VALUE e = ruby_errinfo; + int status; + + PUSH_TAG(PROT_NONE); + if ((status = EXEC_TAG()) == 0) { + e = rb_obj_as_string(e); + warn_printf("Exception `%s' at %s:%d - %s\n", + rb_obj_classname(ruby_errinfo), + ruby_sourcefile, ruby_sourceline, + RSTRING(e)->ptr); + } + POP_TAG(); + if (status == TAG_FATAL && ruby_errinfo == exception_error) { + ruby_errinfo = mesg; + } + else if (status) { + thread_reset_raised(); + JUMP_TAG(status); + } + } + + rb_trap_restore_mask(); + if (tag != TAG_FATAL) { + EXEC_EVENT_HOOK(RUBY_EVENT_RAISE, ruby_current_node, + ruby_frame->self, + ruby_frame->this_func, + ruby_frame->this_class); + } + if (!prot_tag) { + error_print(); + } + thread_reset_raised(); + JUMP_TAG(tag); +} + +void +rb_exc_raise(mesg) + VALUE mesg; +{ + rb_longjmp(TAG_RAISE, mesg); +} + +void +rb_exc_fatal(mesg) + VALUE mesg; +{ + rb_longjmp(TAG_FATAL, mesg); +} + +void +rb_interrupt() +{ + rb_raise(rb_eInterrupt, ""); +} + +/* + * call-seq: + * raise + * raise(string) + * raise(exception [, string [, array]]) + * fail + * fail(string) + * fail(exception [, string [, array]]) + * + * With no arguments, raises the exception in <code>$!</code> or raises + * a <code>RuntimeError</code> if <code>$!</code> is +nil+. + * With a single +String+ argument, raises a + * +RuntimeError+ with the string as a message. Otherwise, + * the first parameter should be the name of an +Exception+ + * class (or an object that returns an +Exception+ object when sent + * an +exception+ message). The optional second parameter sets the + * message associated with the exception, and the third parameter is an + * array of callback information. Exceptions are caught by the + * +rescue+ clause of <code>begin...end</code> blocks. + * + * raise "Failed to create socket" + * raise ArgumentError, "No parameters", caller + */ + +static VALUE +rb_f_raise(argc, argv) + int argc; + VALUE *argv; +{ + rb_raise_jump(rb_make_exception(argc, argv)); + return Qnil; /* not reached */ +} + +static VALUE +rb_make_exception(argc, argv) + int argc; + VALUE *argv; +{ + VALUE mesg; + ID exception; + int n; + + mesg = Qnil; + switch (argc) { + case 0: + mesg = Qnil; + break; + case 1: + if (NIL_P(argv[0])) break; + if (TYPE(argv[0]) == T_STRING) { + mesg = rb_exc_new3(rb_eRuntimeError, argv[0]); + break; + } + n = 0; + goto exception_call; + + case 2: + case 3: + n = 1; + exception_call: + exception = rb_intern("exception"); + if (!rb_respond_to(argv[0], exception)) { + rb_raise(rb_eTypeError, "exception class/object expected"); + } + mesg = rb_funcall(argv[0], exception, n, argv[1]); + break; + default: + rb_raise(rb_eArgError, "wrong number of arguments"); + break; + } + if (argc > 0) { + if (!rb_obj_is_kind_of(mesg, rb_eException)) + rb_raise(rb_eTypeError, "exception object expected"); + if (argc>2) + set_backtrace(mesg, argv[2]); + } + + return mesg; +} + +static void +rb_raise_jump(mesg) + VALUE mesg; +{ + if (ruby_frame != top_frame) { + PUSH_FRAME(); /* fake frame */ + *ruby_frame = *_frame.prev->prev; + rb_longjmp(TAG_RAISE, mesg); + POP_FRAME(); + } + rb_longjmp(TAG_RAISE, mesg); +} + +void +rb_jump_tag(tag) + int tag; +{ + JUMP_TAG(tag); +} + +int +rb_block_given_p() +{ + if (ruby_frame->iter == ITER_CUR && ruby_block) + return Qtrue; + return Qfalse; +} + +int +rb_iterator_p() +{ + return rb_block_given_p(); +} + +/* + * call-seq: + * block_given? => true or false + * iterator? => true or false + * + * Returns <code>true</code> if <code>yield</code> would execute a + * block in the current context. The <code>iterator?</code> form + * is mildly deprecated. + * + * def try + * if block_given? + * yield + * else + * "no block" + * end + * end + * try #=> "no block" + * try { "hello" } #=> "hello" + * try do "hello" end #=> "hello" + */ + + +static VALUE +rb_f_block_given_p() +{ + if (ruby_frame->prev && ruby_frame->prev->iter == ITER_CUR && ruby_block) + return Qtrue; + return Qfalse; +} + +static VALUE rb_eThreadError; + +NORETURN(static void proc_jump_error(int, VALUE)); +static void +proc_jump_error(state, result) + int state; + VALUE result; +{ + char mesg[32]; + char *statement; + + switch (state) { + case TAG_BREAK: + statement = "break"; break; + case TAG_RETURN: + statement = "return"; break; + case TAG_RETRY: + statement = "retry"; break; + default: + statement = "local-jump"; break; /* should not happen */ + } + snprintf(mesg, sizeof mesg, "%s from proc-closure", statement); + localjump_error(mesg, result, state); +} + +NORETURN(static void return_jump(VALUE)); +static void +return_jump(retval) + VALUE retval; +{ + struct tag *tt = prot_tag; + int yield = Qfalse; + + if (retval == Qundef) retval = Qnil; + while (tt) { + if (tt->tag == PROT_YIELD) { + yield = Qtrue; + tt = tt->prev; + } + if ((tt->tag == PROT_FUNC && tt->frame->uniq == ruby_frame->uniq) || + (tt->tag == PROT_LAMBDA && !yield)) + { + tt->dst = (VALUE)tt->frame->uniq; + tt->retval = retval; + JUMP_TAG(TAG_RETURN); + } + if (tt->tag == PROT_THREAD) { + rb_raise(rb_eThreadError, "return can't jump across threads"); + } + tt = tt->prev; + } + localjump_error("unexpected return", retval, TAG_RETURN); +} + +static void +break_jump(retval) + VALUE retval; +{ + struct tag *tt = prot_tag; + + if (retval == Qundef) retval = Qnil; + while (tt) { + switch (tt->tag) { + case PROT_THREAD: + case PROT_YIELD: + case PROT_LOOP: + case PROT_LAMBDA: + tt->dst = (VALUE)tt->frame->uniq; + tt->retval = retval; + JUMP_TAG(TAG_BREAK); + break; + default: + break; + } + tt = tt->prev; + } + localjump_error("unexpected break", retval, TAG_BREAK); +} + +static VALUE bmcall _((VALUE, VALUE)); +static int method_arity _((VALUE)); + +static VALUE +rb_yield_0(val, self, klass, flags, avalue) + VALUE val, self, klass; /* OK */ + int flags, avalue; +{ + NODE *node, *var; + volatile VALUE result = Qnil; + volatile VALUE old_cref; + volatile VALUE old_wrapper; + struct BLOCK * volatile block; + struct SCOPE * volatile old_scope; + int old_vmode; + struct FRAME frame; + NODE *cnode = ruby_current_node; + int lambda = flags & YIELD_LAMBDA_CALL; + int state; + + if (!rb_block_given_p()) { + localjump_error("no block given", Qnil, 0); + } + + PUSH_VARS(); + block = ruby_block; + frame = block->frame; + frame.prev = ruby_frame; + ruby_frame = &(frame); + old_cref = (VALUE)ruby_cref; + ruby_cref = block->cref; + old_wrapper = ruby_wrapper; + ruby_wrapper = block->wrapper; + old_scope = ruby_scope; + ruby_scope = block->scope; + old_vmode = scope_vmode; + scope_vmode = (flags & YIELD_PUBLIC_DEF) ? SCOPE_PUBLIC : block->vmode; + ruby_block = block->prev; + if (block->flags & BLOCK_D_SCOPE) { + /* put place holder for dynamic (in-block) local variables */ + ruby_dyna_vars = new_dvar(0, 0, block->dyna_vars); + } + else { + /* FOR does not introduce new scope */ + ruby_dyna_vars = block->dyna_vars; + } + PUSH_CLASS(klass ? klass : block->klass); + if (!klass) { + self = block->self; + } + node = block->body; + var = block->var; + + if (var) { + PUSH_TAG(PROT_NONE); + if ((state = EXEC_TAG()) == 0) { + NODE *bvar = NULL; + block_var: + if (var == (NODE*)1) { /* no parameter || */ + if (lambda && RARRAY(val)->len != 0) { + rb_raise(rb_eArgError, "wrong number of arguments (%ld for 0)", + RARRAY(val)->len); + } + } + else if (var == (NODE*)2) { + if (TYPE(val) == T_ARRAY && RARRAY(val)->len != 0) { + rb_raise(rb_eArgError, "wrong number of arguments (%ld for 0)", + RARRAY(val)->len); + } + } + else if (!bvar && nd_type(var) == NODE_BLOCK_PASS) { + bvar = var->nd_body; + var = var->nd_args; + goto block_var; + } + else if (nd_type(var) == NODE_MASGN) { + if (!avalue) { + val = svalue_to_mrhs(val, var->nd_head); + } + massign(self, var, val, lambda); + } + else { + int len = 0; + if (avalue) { + len = RARRAY(val)->len; + if (len == 0) { + goto zero_arg; + } + if (len == 1) { + val = RARRAY(val)->ptr[0]; + } + else { + goto multi_values; + } + } + else if (val == Qundef) { + zero_arg: + val = Qnil; + multi_values: + { + ruby_current_node = var; + rb_warn("multiple values for a block parameter (%d for 1)\n\tfrom %s:%d", + len, cnode->nd_file, nd_line(cnode)); + ruby_current_node = cnode; + } + } + assign(self, var, val, lambda); + } + if (bvar) { + VALUE blk; + if (flags & YIELD_PROC_CALL) + blk = block->block_obj; + else + blk = rb_block_proc(); + assign(self, bvar, blk, 0); + } + } + POP_TAG(); + if (state) goto pop_state; + } + else if (lambda && RARRAY(val)->len != 0 && + (!node || nd_type(node) != NODE_IFUNC || + node->nd_cfnc != bmcall)) { + rb_raise(rb_eArgError, "wrong number of arguments (%ld for 0)", + RARRAY(val)->len); + } + if (!node) { + state = 0; + goto pop_state; + } + ruby_current_node = node; + + PUSH_ITER(block->iter); + PUSH_TAG(lambda ? PROT_NONE : PROT_YIELD); + if ((state = EXEC_TAG()) == 0) { + redo: + if (nd_type(node) == NODE_CFUNC || nd_type(node) == NODE_IFUNC) { + if (node->nd_state == YIELD_FUNC_AVALUE) { + if (!avalue) { + val = svalue_to_avalue(val); + } + } + else { + if (avalue) { + val = avalue_to_svalue(val); + } + if (val == Qundef && node->nd_state != YIELD_FUNC_SVALUE) + val = Qnil; + } + if ((block->flags&BLOCK_FROM_METHOD) && RTEST(block->block_obj)) { + struct BLOCK *data, _block; + Data_Get_Struct(block->block_obj, struct BLOCK, data); + _block = *data; + _block.outer = ruby_block; + _block.uniq = block_unique++; + ruby_block = &_block; + PUSH_ITER(ITER_PRE); + ruby_frame->iter = ITER_CUR; + result = (*node->nd_cfnc)(val, node->nd_tval, self); + POP_ITER(); + } + else { + result = (*node->nd_cfnc)(val, node->nd_tval, self); + } + } + else { + result = rb_eval(self, node); + } + } + else { + switch (state) { + case TAG_REDO: + state = 0; + CHECK_INTS; + goto redo; + case TAG_NEXT: + state = 0; + result = prot_tag->retval; + break; + case TAG_BREAK: + if (TAG_DST()) { + result = prot_tag->retval; + } + else { + lambda = Qtrue; /* just pass TAG_BREAK */ + } + break; + default: + break; + } + } + POP_TAG(); + POP_ITER(); + pop_state: + POP_CLASS(); + if (ruby_dyna_vars && (block->flags & BLOCK_D_SCOPE) && + !FL_TEST(ruby_dyna_vars, DVAR_DONT_RECYCLE)) { + struct RVarmap *vars = ruby_dyna_vars; + + if (ruby_dyna_vars->id == 0) { + vars = ruby_dyna_vars->next; + rb_gc_force_recycle((VALUE)ruby_dyna_vars); + while (vars && vars->id != 0 && vars != block->dyna_vars) { + struct RVarmap *tmp = vars->next; + rb_gc_force_recycle((VALUE)vars); + vars = tmp; + } + } + } + POP_VARS(); + ruby_block = block; + ruby_frame = ruby_frame->prev; + ruby_cref = (NODE*)old_cref; + ruby_wrapper = old_wrapper; + if (ruby_scope->flags & SCOPE_DONT_RECYCLE) + scope_dup(old_scope); + ruby_scope = old_scope; + scope_vmode = old_vmode; + switch (state) { + case 0: + break; + case TAG_BREAK: + if (!lambda) { + struct tag *tt = prot_tag; + + while (tt) { + if (tt->tag == PROT_LOOP && tt->blkid == ruby_block->uniq) { + tt->dst = (VALUE)tt->frame->uniq; + tt->retval = result; + JUMP_TAG(TAG_BREAK); + } + tt = tt->prev; + } + proc_jump_error(TAG_BREAK, result); + } + /* fall through */ + default: + JUMP_TAG(state); + break; + } + ruby_current_node = cnode; + return result; +} + +VALUE +rb_yield(val) + VALUE val; +{ + return rb_yield_0(val, 0, 0, 0, Qfalse); +} + +VALUE +#ifdef HAVE_STDARG_PROTOTYPES +rb_yield_values(int n, ...) +#else +rb_yield_values(n, va_alist) + int n; + va_dcl +#endif +{ + int i; + va_list args; + VALUE val; + + if (n == 0) { + return rb_yield_0(Qundef, 0, 0, 0, Qfalse); + } + val = rb_values_new2(n, 0); + va_init_list(args, n); + for (i=0; i<n; i++) { + RARRAY(val)->ptr[i] = va_arg(args, VALUE); + } + RARRAY(val)->len = n; + va_end(args); + return rb_yield_0(val, 0, 0, 0, Qtrue); +} + +VALUE +rb_yield_splat(values) + VALUE values; +{ + int avalue = Qfalse; + + if (TYPE(values) == T_ARRAY) { + if (RARRAY(values)->len == 0) { + values = Qundef; + } + else { + avalue = Qtrue; + } + } + return rb_yield_0(values, 0, 0, 0, avalue); +} + +/* + * call-seq: + * loop {|| block } + * + * Repeatedly executes the block. + * + * loop do + * print "Input: " + * line = gets + * break if !line or line =~ /^qQ/ + * # ... + * end + */ + +static VALUE +rb_f_loop() +{ + for (;;) { + rb_yield_0(Qundef, 0, 0, 0, Qfalse); + CHECK_INTS; + } + return Qnil; /* dummy */ +} + +static VALUE +massign(self, node, val, pcall) + VALUE self; + NODE *node; + VALUE val; + int pcall; +{ + NODE *list; + long i = 0, len; + + len = RARRAY(val)->len; + list = node->nd_head; + for (; list && i<len; i++) { + assign(self, list->nd_head, RARRAY(val)->ptr[i], pcall); + list = list->nd_next; + } + if (pcall && list) goto arg_error; + if (node->nd_args) { + if ((long)(node->nd_args) == -1) { + /* no check for mere `*' */ + } + else if (!list && i<len) { + assign(self, node->nd_args, rb_ary_new4(len-i, RARRAY(val)->ptr+i), pcall); + } + else { + assign(self, node->nd_args, rb_ary_new2(0), pcall); + } + } + else if (pcall && i < len) { + goto arg_error; + } + + while (list) { + i++; + assign(self, list->nd_head, Qnil, pcall); + list = list->nd_next; + } + return val; + + arg_error: + while (list) { + i++; + list = list->nd_next; + } + rb_raise(rb_eArgError, "wrong number of arguments (%ld for %ld)", len, i); +} + +static void +assign(self, lhs, val, pcall) + VALUE self; + NODE *lhs; + VALUE val; + int pcall; +{ + ruby_current_node = lhs; + if (val == Qundef) { + rb_warning("assigning void value"); + val = Qnil; + } + switch (nd_type(lhs)) { + case NODE_GASGN: + rb_gvar_set(lhs->nd_entry, val); + break; + + case NODE_IASGN: + rb_ivar_set(self, lhs->nd_vid, val); + break; + + case NODE_LASGN: + if (ruby_scope->local_vars == 0) + rb_bug("unexpected local variable assignment"); + ruby_scope->local_vars[lhs->nd_cnt] = val; + break; + + case NODE_DASGN: + dvar_asgn(lhs->nd_vid, val); + break; + + case NODE_DASGN_CURR: + dvar_asgn_curr(lhs->nd_vid, val); + break; + + case NODE_CDECL: + if (lhs->nd_vid == 0) { + rb_const_set(class_prefix(self, lhs->nd_else), lhs->nd_else->nd_mid, val); + } + else { + rb_const_set(ruby_cbase, lhs->nd_vid, val); + } + break; + + case NODE_CVDECL: + if (RTEST(ruby_verbose) && FL_TEST(ruby_cbase, FL_SINGLETON)) { + rb_warn("declaring singleton class variable"); + } + rb_cvar_set(cvar_cbase(), lhs->nd_vid, val, Qtrue); + break; + + case NODE_CVASGN: + rb_cvar_set(cvar_cbase(), lhs->nd_vid, val, Qfalse); + break; + + case NODE_MASGN: + massign(self, lhs, svalue_to_mrhs(val, lhs->nd_head), pcall); + break; + + case NODE_CALL: + case NODE_ATTRASGN: + { + VALUE recv; + int scope; + if (lhs->nd_recv == (NODE *)1) { + recv = self; + scope = 1; + } + else { + recv = rb_eval(self, lhs->nd_recv); + scope = 0; + } + if (!lhs->nd_args) { + /* attr set */ + ruby_current_node = lhs; + SET_CURRENT_SOURCE(); + rb_call(CLASS_OF(recv), recv, lhs->nd_mid, 1, &val, scope); + } + else { + /* array set */ + VALUE args; + + args = rb_eval(self, lhs->nd_args); + rb_ary_push(args, val); + ruby_current_node = lhs; + SET_CURRENT_SOURCE(); + rb_call(CLASS_OF(recv), recv, lhs->nd_mid, + RARRAY(args)->len, RARRAY(args)->ptr, scope); + } + } + break; + + default: + rb_bug("bug in variable assignment"); + break; + } +} + +VALUE +rb_iterate(it_proc, data1, bl_proc, data2) + VALUE (*it_proc) _((VALUE)), (*bl_proc)(ANYARGS); + VALUE data1, data2; +{ + int state; + volatile VALUE retval = Qnil; + NODE *node = NEW_IFUNC(bl_proc, data2); + VALUE self = ruby_top_self; + + PUSH_ITER(ITER_PRE); + PUSH_TAG(PROT_LOOP); + PUSH_BLOCK(0, node); + state = EXEC_TAG(); + if (state == 0) { + iter_retry: + retval = (*it_proc)(data1); + } + else if (state == TAG_BREAK && TAG_DST()) { + retval = prot_tag->retval; + state = 0; + } + else if (state == TAG_RETRY) { + state = 0; + goto iter_retry; + } + POP_BLOCK(); + POP_TAG(); + POP_ITER(); + + switch (state) { + case 0: + break; + default: + JUMP_TAG(state); + } + return retval; +} + +static int +handle_rescue(self, node) + VALUE self; + NODE *node; +{ + int argc; VALUE *argv; /* used in SETUP_ARGS */ + TMP_PROTECT; + + if (!node->nd_args) { + return rb_obj_is_kind_of(ruby_errinfo, rb_eStandardError); + } + + BEGIN_CALLARGS; + SETUP_ARGS(node->nd_args); + END_CALLARGS; + + while (argc--) { + if (!rb_obj_is_kind_of(argv[0], rb_cModule)) { + rb_raise(rb_eTypeError, "class or module required for rescue clause"); + } + if (RTEST(rb_funcall(*argv, eqq, 1, ruby_errinfo))) return 1; + argv++; + } + return 0; +} + +VALUE +#ifdef HAVE_STDARG_PROTOTYPES +rb_rescue2(VALUE (*b_proc)(ANYARGS), VALUE data1, VALUE (*r_proc)(ANYARGS), VALUE data2, ...) +#else +rb_rescue2(b_proc, data1, r_proc, data2, va_alist) + VALUE (*b_proc)(ANYARGS), (*r_proc)(ANYARGS); + VALUE data1, data2; + va_dcl +#endif +{ + int state; + volatile VALUE result; + volatile VALUE e_info = ruby_errinfo; + va_list args; + + PUSH_TAG(PROT_NONE); + if ((state = EXEC_TAG()) == 0) { + retry_entry: + result = (*b_proc)(data1); + } + else if (state == TAG_RAISE) { + int handle = Qfalse; + VALUE eclass; + + va_init_list(args, data2); + while (eclass = va_arg(args, VALUE)) { + if (rb_obj_is_kind_of(ruby_errinfo, eclass)) { + handle = Qtrue; + break; + } + } + va_end(args); + + if (handle) { + if (r_proc) { + PUSH_TAG(PROT_NONE); + if ((state = EXEC_TAG()) == 0) { + result = (*r_proc)(data2, ruby_errinfo); + } + POP_TAG(); + if (state == TAG_RETRY) { + state = 0; + ruby_errinfo = Qnil; + goto retry_entry; + } + } + else { + result = Qnil; + state = 0; + } + if (state == 0) { + ruby_errinfo = e_info; + } + } + } + POP_TAG(); + if (state) JUMP_TAG(state); + + return result; +} + +VALUE +rb_rescue(b_proc, data1, r_proc, data2) + VALUE (*b_proc)(), (*r_proc)(); + VALUE data1, data2; +{ + return rb_rescue2(b_proc, data1, r_proc, data2, rb_eStandardError, (VALUE)0); +} + +static VALUE cont_protect; + +VALUE +rb_protect(proc, data, state) + VALUE (*proc) _((VALUE)); + VALUE data; + int *state; +{ + VALUE result = Qnil; /* OK */ + int status; + + PUSH_THREAD_TAG(); + cont_protect = (VALUE)rb_node_newnode(NODE_MEMO, cont_protect, 0, 0); + if ((status = EXEC_TAG()) == 0) { + result = (*proc)(data); + } + else if (status == TAG_THREAD) { + rb_thread_start_1(); + } + cont_protect = ((NODE *)cont_protect)->u1.value; + POP_THREAD_TAG(); + if (state) { + *state = status; + } + if (status != 0) { + return Qnil; + } + + return result; +} + +VALUE +rb_ensure(b_proc, data1, e_proc, data2) + VALUE (*b_proc)(); + VALUE data1; + VALUE (*e_proc)(); + VALUE data2; +{ + int state; + volatile VALUE result = Qnil; + VALUE retval; + + PUSH_TAG(PROT_NONE); + if ((state = EXEC_TAG()) == 0) { + result = (*b_proc)(data1); + } + POP_TAG(); + retval = prot_tag ? prot_tag->retval : Qnil; /* save retval */ + (*e_proc)(data2); + if (prot_tag) return_value(retval); + if (state) JUMP_TAG(state); + return result; +} + +VALUE +rb_with_disable_interrupt(proc, data) + VALUE (*proc)(); + VALUE data; +{ + VALUE result = Qnil; /* OK */ + int status; + + DEFER_INTS; + { + int thr_critical = rb_thread_critical; + + rb_thread_critical = Qtrue; + PUSH_TAG(PROT_NONE); + if ((status = EXEC_TAG()) == 0) { + result = (*proc)(data); + } + POP_TAG(); + rb_thread_critical = thr_critical; + } + ENABLE_INTS; + if (status) JUMP_TAG(status); + + return result; +} + +static inline void +stack_check() +{ + static int overflowing = 0; + + if (!overflowing && ruby_stack_check()) { + int state; + overflowing = 1; + PUSH_TAG(PROT_NONE); + if ((state = EXEC_TAG()) == 0) { + rb_exc_raise(sysstack_error); + } + POP_TAG(); + overflowing = 0; + JUMP_TAG(state); + } +} + +static int last_call_status; + +#define CSTAT_PRIV 1 +#define CSTAT_PROT 2 +#define CSTAT_VCALL 4 +#define CSTAT_SUPER 8 + +/* + * call-seq: + * obj.method_missing(symbol [, *args] ) => result + * + * Invoked by Ruby when <i>obj</i> is sent a message it cannot handle. + * <i>symbol</i> is the symbol for the method called, and <i>args</i> + * are any arguments that were passed to it. By default, the interpreter + * raises an error when this method is called. However, it is possible + * to override the method to provide more dynamic behavior. + * The example below creates + * a class <code>Roman</code>, which responds to methods with names + * consisting of roman numerals, returning the corresponding integer + * values. + * + * class Roman + * def romanToInt(str) + * # ... + * end + * def method_missing(methId) + * str = methId.id2name + * romanToInt(str) + * end + * end + * + * r = Roman.new + * r.iv #=> 4 + * r.xxiii #=> 23 + * r.mm #=> 2000 + */ + +static VALUE +rb_method_missing(argc, argv, obj) + int argc; + VALUE *argv; + VALUE obj; +{ + ID id; + VALUE exc = rb_eNoMethodError; + char *format = 0; + NODE *cnode = ruby_current_node; + + if (argc == 0 || !SYMBOL_P(argv[0])) { + rb_raise(rb_eArgError, "no id given"); + } + + stack_check(); + + id = SYM2ID(argv[0]); + + if (last_call_status & CSTAT_PRIV) { + format = "private method `%s' called for %s"; + } + else if (last_call_status & CSTAT_PROT) { + format = "protected method `%s' called for %s"; + } + else if (last_call_status & CSTAT_VCALL) { + format = "undefined local variable or method `%s' for %s"; + exc = rb_eNameError; + } + else if (last_call_status & CSTAT_SUPER) { + format = "super: no superclass method `%s'"; + } + if (!format) { + format = "undefined method `%s' for %s"; + } + + ruby_current_node = cnode; + { + int n = 0; + VALUE args[3]; + + args[n++] = rb_funcall(rb_const_get(exc, rb_intern("message")), '!', + 3, rb_str_new2(format), obj, argv[0]); + args[n++] = argv[0]; + if (exc == rb_eNoMethodError) { + args[n++] = rb_ary_new4(argc-1, argv+1); + } + exc = rb_class_new_instance(n, args, exc); + ruby_frame = ruby_frame->prev; /* pop frame for "method_missing" */ + rb_exc_raise(exc); + } + + return Qnil; /* not reached */ +} + +static VALUE +method_missing(obj, id, argc, argv, call_status) + VALUE obj; + ID id; + int argc; + const VALUE *argv; + int call_status; +{ + VALUE *nargv; + + last_call_status = call_status; + + if (id == missing) { + PUSH_FRAME(); + rb_method_missing(argc, argv, obj); + POP_FRAME(); + } + else if (id == ID_ALLOCATOR) { + rb_raise(rb_eTypeError, "allocator undefined for %s", rb_class2name(obj)); + } + + nargv = ALLOCA_N(VALUE, argc+1); + nargv[0] = ID2SYM(id); + MEMCPY(nargv+1, argv, VALUE, argc); + + return rb_funcall2(obj, missing, argc+1, nargv); +} + +static inline VALUE +call_cfunc(func, recv, len, argc, argv) + VALUE (*func)(); + VALUE recv; + int len, argc; + VALUE *argv; +{ + if (len >= 0 && argc != len) { + rb_raise(rb_eArgError, "wrong number of arguments (%d for %d)", + argc, len); + } + + switch (len) { + case -2: + return (*func)(recv, rb_ary_new4(argc, argv)); + break; + case -1: + return (*func)(argc, argv, recv); + break; + case 0: + return (*func)(recv); + break; + case 1: + return (*func)(recv, argv[0]); + break; + case 2: + return (*func)(recv, argv[0], argv[1]); + break; + case 3: + return (*func)(recv, argv[0], argv[1], argv[2]); + break; + case 4: + return (*func)(recv, argv[0], argv[1], argv[2], argv[3]); + break; + case 5: + return (*func)(recv, argv[0], argv[1], argv[2], argv[3], argv[4]); + break; + case 6: + return (*func)(recv, argv[0], argv[1], argv[2], argv[3], argv[4], + argv[5]); + break; + case 7: + return (*func)(recv, argv[0], argv[1], argv[2], argv[3], argv[4], + argv[5], argv[6]); + break; + case 8: + return (*func)(recv, argv[0], argv[1], argv[2], argv[3], argv[4], + argv[5], argv[6], argv[7]); + break; + case 9: + return (*func)(recv, argv[0], argv[1], argv[2], argv[3], argv[4], + argv[5], argv[6], argv[7], argv[8]); + break; + case 10: + return (*func)(recv, argv[0], argv[1], argv[2], argv[3], argv[4], + argv[5], argv[6], argv[7], argv[8], argv[9]); + break; + case 11: + return (*func)(recv, argv[0], argv[1], argv[2], argv[3], argv[4], + argv[5], argv[6], argv[7], argv[8], argv[9], argv[10]); + break; + case 12: + return (*func)(recv, argv[0], argv[1], argv[2], argv[3], argv[4], + argv[5], argv[6], argv[7], argv[8], argv[9], + argv[10], argv[11]); + break; + case 13: + return (*func)(recv, argv[0], argv[1], argv[2], argv[3], argv[4], + argv[5], argv[6], argv[7], argv[8], argv[9], argv[10], + argv[11], argv[12]); + break; + case 14: + return (*func)(recv, argv[0], argv[1], argv[2], argv[3], argv[4], + argv[5], argv[6], argv[7], argv[8], argv[9], argv[10], + argv[11], argv[12], argv[13]); + break; + case 15: + return (*func)(recv, argv[0], argv[1], argv[2], argv[3], argv[4], + argv[5], argv[6], argv[7], argv[8], argv[9], argv[10], + argv[11], argv[12], argv[13], argv[14]); + break; + default: + rb_raise(rb_eArgError, "too many arguments (%d)", len); + break; + } + return Qnil; /* not reached */ +} + +static VALUE +rb_call0(klass, recv, id, oid, argc, argv, body, nosuper) + VALUE klass, recv; + ID id; + ID oid; + int argc; /* OK */ + VALUE *argv; /* OK */ + NODE *body; /* OK */ + int nosuper; +{ + NODE *b2; /* OK */ + volatile VALUE result = Qnil; + int itr; + static int tick; + volatile VALUE args; + TMP_PROTECT; + + switch (ruby_iter->iter) { + case ITER_PRE: + itr = ITER_CUR; + break; + case ITER_CUR: + default: + itr = ITER_NOT; + break; + } + + if ((++tick & 0xff) == 0) { + CHECK_INTS; /* better than nothing */ + stack_check(); + rb_gc_finalize_deferred(); + } + if (argc < 0) { + argc = -argc-1; + args = rb_ary_concat(rb_ary_new4(argc, argv), splat_value(argv[argc])); + argc = RARRAY(args)->len; + argv = RARRAY(args)->ptr; + } + PUSH_ITER(itr); + PUSH_FRAME(); + ruby_frame->callee = id; + ruby_frame->this_func = oid; + ruby_frame->this_class = nosuper?0:klass; + ruby_frame->self = recv; + ruby_frame->argc = argc; + + switch (nd_type(body)) { + case NODE_CFUNC: + { + int len = body->nd_argc; + + if (len < -2) { + rb_bug("bad argc (%d) specified for `%s(%s)'", + len, rb_class2name(klass), rb_id2name(id)); + } + if (event_hooks) { + int state; + + EXEC_EVENT_HOOK(RUBY_EVENT_C_CALL, ruby_current_node, + recv, id, klass); + PUSH_TAG(PROT_FUNC); + if ((state = EXEC_TAG()) == 0) { + result = call_cfunc(body->nd_cfnc, recv, len, argc, argv); + } + POP_TAG(); + ruby_current_node = ruby_frame->node; + EXEC_EVENT_HOOK(RUBY_EVENT_C_RETURN, ruby_current_node, + recv, id, klass); + if (state) JUMP_TAG(state); + } + else { + result = call_cfunc(body->nd_cfnc, recv, len, argc, argv); + } + } + break; + + /* for attr get/set */ + case NODE_IVAR: + if (argc != 0) { + rb_raise(rb_eArgError, "wrong number of arguments (%d for 0)", argc); + } + result = rb_attr_get(recv, body->nd_vid); + break; + + case NODE_ATTRSET: + if (argc != 1) + rb_raise(rb_eArgError, "wrong number of arguments (%d for 1)", argc); + result = rb_ivar_set(recv, body->nd_vid, argv[0]); + break; + + case NODE_ZSUPER: /* visibility override */ + result = rb_call_super(argc, argv); + break; + + case NODE_BMETHOD: + ruby_frame->flags |= FRAME_DMETH; + result = proc_invoke(body->nd_cval, rb_ary_new4(argc, argv), recv, klass); + break; + + case NODE_SCOPE: + { + int state; + VALUE *local_vars; /* OK */ + NODE *saved_cref = 0; + + PUSH_SCOPE(); + + if (body->nd_rval) { + saved_cref = ruby_cref; + ruby_cref = (NODE*)body->nd_rval; + } + PUSH_CLASS(ruby_cbase); + if (body->nd_tbl) { + local_vars = TMP_ALLOC(body->nd_tbl[0]+1); + *local_vars++ = (VALUE)body; + rb_mem_clear(local_vars, body->nd_tbl[0]); + ruby_scope->local_tbl = body->nd_tbl; + ruby_scope->local_vars = local_vars; + } + else { + local_vars = ruby_scope->local_vars = 0; + ruby_scope->local_tbl = 0; + } + b2 = body = body->nd_next; + + PUSH_VARS(); + PUSH_TAG(PROT_FUNC); + + if ((state = EXEC_TAG()) == 0) { + NODE *node = 0; + int i; + + if (nd_type(body) == NODE_ARGS) { + node = body; + body = 0; + } + else if (nd_type(body) == NODE_BLOCK) { + node = body->nd_head; + body = body->nd_next; + } + if (node) { + if (nd_type(node) != NODE_ARGS) { + rb_bug("no argument-node"); + } + + i = node->nd_cnt; + if (i > argc) { + rb_raise(rb_eArgError, "wrong number of arguments (%d for %d)", argc, i); + } + if ((long)node->nd_rest == -1) { + int opt = i; + NODE *optnode = node->nd_opt; + + while (optnode) { + opt++; + optnode = optnode->nd_next; + } + if (opt < argc) { + rb_raise(rb_eArgError, "wrong number of arguments (%d for %d)", + argc, opt); + } + ruby_frame->argc = opt; + } + + if (local_vars) { + if (i > 0) { + /* +2 for $_ and $~ */ + MEMCPY(local_vars+2, argv, VALUE, i); + } + argv += i; argc -= i; + if (node->nd_opt) { + NODE *opt = node->nd_opt; + + while (opt && argc) { + assign(recv, opt->nd_head, *argv, 1); + argv++; argc--; + opt = opt->nd_next; + } + if (opt) { + rb_eval(recv, opt); + } + } + if ((long)node->nd_rest >= 0) { + VALUE v; + + if (argc > 0) + v = rb_ary_new4(argc,argv); + else + v = rb_ary_new2(0); + ruby_scope->local_vars[node->nd_rest] = v; + } + } + } + if ((long)node->nd_rest >= 0) { + ruby_frame->argc = -(ruby_frame->argc - argc)-1; + } + + if (event_hooks) { + EXEC_EVENT_HOOK(RUBY_EVENT_CALL, b2, recv, id, klass); + } + result = rb_eval(recv, body); + } + else if (state == TAG_RETURN && TAG_DST()) { + result = prot_tag->retval; + state = 0; + } + POP_TAG(); + POP_VARS(); + POP_CLASS(); + POP_SCOPE(); + ruby_cref = saved_cref; + if (event_hooks) { + EXEC_EVENT_HOOK(RUBY_EVENT_RETURN, body, recv, id, klass); + } + switch (state) { + case 0: + break; + + case TAG_BREAK: + case TAG_RETURN: + JUMP_TAG(state); + break; + + case TAG_RETRY: + if (rb_block_given_p()) JUMP_TAG(state); + /* fall through */ + default: + jump_tag_but_local_jump(state, result); + break; + } + } + break; + + default: + rb_bug("unknown node type %d", nd_type(body)); + break; + } + POP_FRAME(); + POP_ITER(); + return result; +} + +static VALUE +rb_call(klass, recv, mid, argc, argv, scope) + VALUE klass, recv; + ID mid; + int argc; /* OK */ + const VALUE *argv; /* OK */ + int scope; +{ + NODE *body; /* OK */ + int noex; + ID id = mid; + struct cache_entry *ent; + + if (!klass) { + rb_raise(rb_eNotImpError, "method `%s' called on terminated object (0x%lx)", + rb_id2name(mid), recv); + } + /* is it in the method cache? */ + ent = cache + EXPR1(klass, mid); + if (ent->mid == mid && ent->klass == klass) { + if (!ent->method) + return method_missing(recv, mid, argc, argv, scope==2?CSTAT_VCALL:0); + klass = ent->origin; + id = ent->mid0; + noex = ent->noex; + body = ent->method; + } + else if ((body = rb_get_method_body(&klass, &id, &noex)) == 0) { + if (scope == 3) { + return method_missing(recv, mid, argc, argv, CSTAT_SUPER); + } + return method_missing(recv, mid, argc, argv, scope==2?CSTAT_VCALL:0); + } + + if (mid != missing) { + /* receiver specified form for private method */ + if ((noex & NOEX_PRIVATE) && scope == 0) + return method_missing(recv, mid, argc, argv, CSTAT_PRIV); + + /* self must be kind of a specified form for protected method */ + if ((noex & NOEX_PROTECTED)) { + VALUE defined_class = klass; + + if (TYPE(defined_class) == T_ICLASS) { + defined_class = RBASIC(defined_class)->klass; + } + if (!rb_obj_is_kind_of(ruby_frame->self, rb_class_real(defined_class))) + return method_missing(recv, mid, argc, argv, CSTAT_PROT); + } + } + + return rb_call0(klass, recv, mid, id, argc, argv, body, noex & NOEX_NOSUPER); +} + +VALUE +rb_apply(recv, mid, args) + VALUE recv; + ID mid; + VALUE args; +{ + int argc; + VALUE *argv; + + argc = RARRAY(args)->len; /* Assigns LONG, but argc is INT */ + argv = ALLOCA_N(VALUE, argc); + MEMCPY(argv, RARRAY(args)->ptr, VALUE, argc); + return rb_call(CLASS_OF(recv), recv, mid, argc, argv, 1); +} + +/* + * call-seq: + * obj.send(symbol [, args...]) => obj + * obj.__send__(symbol [, args...]) => obj + * + * Invokes the method identified by _symbol_, passing it any + * arguments specified. You can use <code>__send__</code> if the name + * +send+ clashes with an existing method in _obj_. + * + * class Klass + * def hello(*args) + * "Hello " + args.join(' ') + * end + * end + * k = Klass.new + * k.send :hello, "gentle", "readers" #=> "Hello gentle readers" + */ + +static VALUE +rb_f_send(argc, argv, recv) + int argc; + VALUE *argv; + VALUE recv; +{ + VALUE vid; + + if (argc == 0) rb_raise(rb_eArgError, "no method name given"); + + vid = *argv++; argc--; + PUSH_ITER(rb_block_given_p()?ITER_PRE:ITER_NOT); + vid = rb_call(CLASS_OF(recv), recv, rb_to_id(vid), argc, argv, 1); + POP_ITER(); + + return vid; +} + +VALUE +#ifdef HAVE_STDARG_PROTOTYPES +rb_funcall(VALUE recv, ID mid, int n, ...) +#else +rb_funcall(recv, mid, n, va_alist) + VALUE recv; + ID mid; + int n; + va_dcl +#endif +{ + VALUE *argv; + va_list ar; + va_init_list(ar, n); + + if (n > 0) { + long i; + + argv = ALLOCA_N(VALUE, n); + + for (i=0;i<n;i++) { + argv[i] = va_arg(ar, VALUE); + } + va_end(ar); + } + else { + argv = 0; + } + + return rb_call(CLASS_OF(recv), recv, mid, n, argv, 1); +} + +VALUE +rb_funcall2(recv, mid, argc, argv) + VALUE recv; + ID mid; + int argc; + const VALUE *argv; +{ + return rb_call(CLASS_OF(recv), recv, mid, argc, argv, 1); +} + +VALUE +rb_funcall3(recv, mid, argc, argv) + VALUE recv; + ID mid; + int argc; + const VALUE *argv; +{ + return rb_call(CLASS_OF(recv), recv, mid, argc, argv, 0); +} + +VALUE +rb_call_super(argc, argv) + int argc; + const VALUE *argv; +{ + VALUE result, self, klass, k; + + if (ruby_frame->this_class == 0) { + rb_name_error(ruby_frame->callee, "calling `super' from `%s' is prohibited", + rb_id2name(ruby_frame->this_func)); + } + + self = ruby_frame->self; + klass = ruby_frame->this_class; + + PUSH_ITER(ruby_iter->iter ? ITER_PRE : ITER_NOT); + result = rb_call(RCLASS(klass)->super, self, ruby_frame->this_func, argc, argv, 3); + POP_ITER(); + + return result; +} + +static VALUE +backtrace(lev) + int lev; +{ + struct FRAME *frame = ruby_frame; + char buf[BUFSIZ]; + volatile VALUE ary; + NODE *n; + + ary = rb_ary_new(); + if (frame->this_func == ID_ALLOCATOR) { + frame = frame->prev; + } + if (lev < 0) { + ruby_set_current_source(); + if (frame->this_func) { + snprintf(buf, BUFSIZ, "%s:%d:in `%s'", + ruby_sourcefile, ruby_sourceline, + rb_id2name(frame->this_func)); + } + else if (ruby_sourceline == 0) { + snprintf(buf, BUFSIZ, "%s", ruby_sourcefile); + } + else { + snprintf(buf, BUFSIZ, "%s:%d", ruby_sourcefile, ruby_sourceline); + } + rb_ary_push(ary, rb_str_new2(buf)); + if (lev < -1) return ary; + } + else { + while (lev-- > 0) { + frame = frame->prev; + if (!frame) { + ary = Qnil; + break; + } + } + } + while (frame && (n = frame->node)) { + if (frame->prev && frame->prev->this_func) { + snprintf(buf, BUFSIZ, "%s:%d:in `%s'", + n->nd_file, nd_line(n), + rb_id2name(frame->prev->this_func)); + } + else { + snprintf(buf, BUFSIZ, "%s:%d", n->nd_file, nd_line(n)); + } + rb_ary_push(ary, rb_str_new2(buf)); + frame = frame->prev; + } + + return ary; +} + +/* + * call-seq: + * caller(start=1) => array + * + * Returns the current execution stack---an array containing strings in + * the form ``<em>file:line</em>'' or ``<em>file:line: in + * `method'</em>''. The optional _start_ parameter + * determines the number of initial stack entries to omit from the + * result. + * + * def a(skip) + * caller(skip) + * end + * def b(skip) + * a(skip) + * end + * def c(skip) + * b(skip) + * end + * c(0) #=> ["prog:2:in `a'", "prog:5:in `b'", "prog:8:in `c'", "prog:10"] + * c(1) #=> ["prog:5:in `b'", "prog:8:in `c'", "prog:11"] + * c(2) #=> ["prog:8:in `c'", "prog:12"] + * c(3) #=> ["prog:13"] + */ + +static VALUE +rb_f_caller(argc, argv) + int argc; + VALUE *argv; +{ + VALUE level; + int lev; + + rb_scan_args(argc, argv, "01", &level); + + if (NIL_P(level)) lev = 1; + else lev = NUM2INT(level); + if (lev < 0) rb_raise(rb_eArgError, "negative level (%d)", lev); + + return backtrace(lev); +} + +void +rb_backtrace() +{ + long i; + VALUE ary; + + ary = backtrace(-1); + for (i=0; i<RARRAY(ary)->len; i++) { + printf("\tfrom %s\n", RSTRING(RARRAY(ary)->ptr[i])->ptr); + } +} + +static VALUE +make_backtrace() +{ + return backtrace(-1); +} + +ID +rb_frame_this_func() +{ + return ruby_frame->this_func; +} + +static NODE* +compile(src, file, line) + VALUE src; + char *file; + int line; +{ + NODE *node; + int critical; + + ruby_nerrs = 0; + StringValue(src); + critical = rb_thread_critical; + rb_thread_critical = Qtrue; + node = rb_compile_string(file, src, line); + rb_thread_critical = critical; + + if (ruby_nerrs == 0) return node; + return 0; +} + +static VALUE +eval(self, src, scope, file, line) + VALUE self, src, scope; + char *file; + int line; +{ + struct BLOCK *data = NULL; + volatile VALUE result = Qnil; + struct SCOPE * volatile old_scope; + struct BLOCK * volatile old_block; + struct RVarmap * volatile old_dyna_vars; + VALUE volatile old_cref; + int volatile old_vmode; + volatile VALUE old_wrapper; + struct FRAME frame; + NODE *nodesave = ruby_current_node; + volatile int iter = ruby_frame->iter; + volatile int safe = ruby_safe_level; + int state; + + if (!NIL_P(scope)) { + if (!rb_obj_is_proc(scope)) { + rb_raise(rb_eTypeError, "wrong argument type %s (expected Proc/Binding)", + rb_obj_classname(scope)); + } + + Data_Get_Struct(scope, struct BLOCK, data); + /* PUSH BLOCK from data */ + frame = data->frame; + frame.tmp = ruby_frame; /* gc protection */ + ruby_frame = &(frame); + old_scope = ruby_scope; + ruby_scope = data->scope; + old_block = ruby_block; + ruby_block = data->prev; + old_dyna_vars = ruby_dyna_vars; + ruby_dyna_vars = data->dyna_vars; + old_vmode = scope_vmode; + scope_vmode = data->vmode; + old_cref = (VALUE)ruby_cref; + ruby_cref = data->cref; + old_wrapper = ruby_wrapper; + ruby_wrapper = data->wrapper; + if ((file == 0 || (line == 1 && strcmp(file, "(eval)") == 0)) && data->frame.node) { + file = data->frame.node->nd_file; + if (!file) file = "__builtin__"; + line = nd_line(data->frame.node); + } + + self = data->self; + ruby_frame->iter = data->iter; + } + else { + if (ruby_frame->prev) { + ruby_frame->iter = ruby_frame->prev->iter; + } + } + if (file == 0) { + ruby_set_current_source(); + file = ruby_sourcefile; + line = ruby_sourceline; + } + PUSH_CLASS(ruby_cbase); + ruby_in_eval++; + if (TYPE(ruby_class) == T_ICLASS) { + ruby_class = RBASIC(ruby_class)->klass; + } + PUSH_TAG(PROT_NONE); + if ((state = EXEC_TAG()) == 0) { + NODE *node; + + ruby_safe_level = 0; + result = ruby_errinfo; + ruby_errinfo = Qnil; + node = compile(src, file, line); + ruby_safe_level = safe; + if (ruby_nerrs > 0) { + compile_error(0); + } + if (!NIL_P(result)) ruby_errinfo = result; + result = eval_node(self, node); + } + POP_TAG(); + POP_CLASS(); + ruby_in_eval--; + ruby_safe_level = safe; + if (!NIL_P(scope)) { + int dont_recycle = ruby_scope->flags & SCOPE_DONT_RECYCLE; + + ruby_wrapper = old_wrapper; + ruby_cref = (NODE*)old_cref; + ruby_frame = frame.tmp; + ruby_scope = old_scope; + ruby_block = old_block; + ruby_dyna_vars = old_dyna_vars; + data->vmode = scope_vmode; /* write back visibility mode */ + scope_vmode = old_vmode; + if (dont_recycle) { + struct tag *tag; + struct RVarmap *vars; + + scope_dup(ruby_scope); + for (tag=prot_tag; tag; tag=tag->prev) { + scope_dup(tag->scope); + } + for (vars = ruby_dyna_vars; vars; vars = vars->next) { + FL_SET(vars, DVAR_DONT_RECYCLE); + } + } + } + else { + ruby_frame->iter = iter; + } + ruby_current_node = nodesave; + ruby_set_current_source(); + if (state) { + if (state == TAG_RAISE) { + if (strcmp(file, "(eval)") == 0) { + VALUE mesg, errat; + + errat = get_backtrace(ruby_errinfo); + mesg = rb_attr_get(ruby_errinfo, rb_intern("mesg")); + if (!NIL_P(errat) && TYPE(errat) == T_ARRAY) { + if (!NIL_P(mesg) && TYPE(mesg) == T_STRING) { + rb_str_update(mesg, 0, 0, rb_str_new2(": ")); + rb_str_update(mesg, 0, 0, RARRAY(errat)->ptr[0]); + } + RARRAY(errat)->ptr[0] = RARRAY(backtrace(-2))->ptr[0]; + } + } + rb_exc_raise(ruby_errinfo); + } + JUMP_TAG(state); + } + + return result; +} + +/* + * call-seq: + * eval(string [, binding [, filename [,lineno]]]) => obj + * + * Evaluates the Ruby expression(s) in <em>string</em>. If + * <em>binding</em> is given, the evaluation is performed in its + * context. The binding may be a <code>Binding</code> object or a + * <code>Proc</code> object. If the optional <em>filename</em> and + * <em>lineno</em> parameters are present, they will be used when + * reporting syntax errors. + * + * def getBinding(str) + * return binding + * end + * str = "hello" + * eval "str + ' Fred'" #=> "hello Fred" + * eval "str + ' Fred'", getBinding("bye") #=> "bye Fred" + */ + +static VALUE +rb_f_eval(argc, argv, self) + int argc; + VALUE *argv; + VALUE self; +{ + VALUE src, scope, vfile, vline; + char *file = "(eval)"; + int line = 1; + + rb_scan_args(argc, argv, "13", &src, &scope, &vfile, &vline); + if (ruby_safe_level >= 4) { + StringValue(src); + if (!NIL_P(scope) && !OBJ_TAINTED(scope)) { + rb_raise(rb_eSecurityError, "Insecure: can't modify trusted binding"); + } + } + else { + SafeStringValue(src); + } + if (argc >= 3) { + StringValue(vfile); + } + if (argc >= 4) { + line = NUM2INT(vline); + } + + if (!NIL_P(vfile)) file = RSTRING(vfile)->ptr; + if (NIL_P(scope) && ruby_frame->prev) { + struct FRAME *prev; + VALUE val; + + prev = ruby_frame; + PUSH_FRAME(); + *ruby_frame = *prev->prev; + ruby_frame->prev = prev; + val = eval(self, src, scope, file, line); + POP_FRAME(); + + return val; + } + return eval(self, src, scope, file, line); +} + +/* function to call func under the specified class/module context */ +static VALUE +exec_under(func, under, cbase, args) + VALUE (*func)(); + VALUE under, cbase; + void *args; +{ + VALUE val = Qnil; /* OK */ + int state; + int mode; + + PUSH_CLASS(under); + PUSH_FRAME(); + ruby_frame->self = _frame.prev->self; + ruby_frame->callee = _frame.prev->callee; + ruby_frame->this_func = _frame.prev->this_func; + ruby_frame->this_class = _frame.prev->this_class; + ruby_frame->argc = _frame.prev->argc; + if (cbase) { + PUSH_CREF(cbase); + } + + mode = scope_vmode; + SCOPE_SET(SCOPE_PUBLIC); + PUSH_TAG(PROT_NONE); + if ((state = EXEC_TAG()) == 0) { + val = (*func)(args); + } + POP_TAG(); + if (cbase) POP_CREF(); + SCOPE_SET(mode); + POP_FRAME(); + POP_CLASS(); + if (state) JUMP_TAG(state); + + return val; +} + +static VALUE +eval_under_i(args) + VALUE *args; +{ + return eval(args[0], args[1], Qnil, (char*)args[2], (int)args[3]); +} + +/* string eval under the class/module context */ +static VALUE +eval_under(under, self, src, file, line) + VALUE under, self, src; + const char *file; + int line; +{ + VALUE args[4]; + + if (ruby_safe_level >= 4) { + StringValue(src); + } + else { + SafeStringValue(src); + } + args[0] = self; + args[1] = src; + args[2] = (VALUE)file; + args[3] = (VALUE)line; + return exec_under(eval_under_i, under, under, args); +} + +static VALUE +yield_under_i(self) + VALUE self; +{ + return rb_yield_0(self, self, ruby_class, YIELD_PUBLIC_DEF, Qfalse); +} + +/* block eval under the class/module context */ +static VALUE +yield_under(under, self) + VALUE under, self; +{ + return exec_under(yield_under_i, under, 0, self); +} + +static VALUE +specific_eval(argc, argv, klass, self) + int argc; + VALUE *argv; + VALUE klass, self; +{ + if (rb_block_given_p()) { + if (argc > 0) { + rb_raise(rb_eArgError, "wrong number of arguments (%d for 0)", argc); + } + return yield_under(klass, self); + } + else { + char *file = "(eval)"; + int line = 1; + + if (argc == 0) { + rb_raise(rb_eArgError, "block not supplied"); + } + else { + if (ruby_safe_level >= 4) { + StringValue(argv[0]); + } + else { + SafeStringValue(argv[0]); + } + if (argc > 3) { + rb_raise(rb_eArgError, "wrong number of arguments: %s(src) or %s{..}", + rb_id2name(ruby_frame->callee), + rb_id2name(ruby_frame->callee)); + } + if (argc > 2) line = NUM2INT(argv[2]); + if (argc > 1) { + file = StringValuePtr(argv[1]); + } + } + return eval_under(klass, self, argv[0], file, line); + } +} + +/* + * call-seq: + * obj.instance_eval(string [, filename [, lineno]] ) => obj + * obj.instance_eval {| | block } => obj + * + * Evaluates a string containing Ruby source code, or the given block, + * within the context of the receiver (_obj_). In order to set the + * context, the variable +self+ is set to _obj_ while + * the code is executing, giving the code access to _obj_'s + * instance variables. In the version of <code>instance_eval</code> + * that takes a +String+, the optional second and third + * parameters supply a filename and starting line number that are used + * when reporting compilation errors. + * + * class Klass + * def initialize + * @secret = 99 + * end + * end + * k = Klass.new + * k.instance_eval { @secret } #=> 99 + */ + +VALUE +rb_obj_instance_eval(argc, argv, self) + int argc; + VALUE *argv; + VALUE self; +{ + VALUE klass; + + if (FIXNUM_P(self) || SYMBOL_P(self)) { + klass = Qnil; + } + else { + klass = rb_singleton_class(self); + } + return specific_eval(argc, argv, klass, self); +} + +/* + * call-seq: + * mod.class_eval(string [, filename [, lineno]]) => obj + * mod.module_eval {|| block } => obj + * + * Evaluates the string or block in the context of _mod_. This can + * be used to add methods to a class. <code>module_eval</code> returns + * the result of evaluating its argument. The optional _filename_ + * and _lineno_ parameters set the text for error messages. + * + * class Thing + * end + * a = %q{def hello() "Hello there!" end} + * Thing.module_eval(a) + * puts Thing.new.hello() + * Thing.module_eval("invalid code", "dummy", 123) + * + * <em>produces:</em> + * + * Hello there! + * dummy:123:in `module_eval': undefined local variable + * or method `code' for Thing:Class + */ + +VALUE +rb_mod_module_eval(argc, argv, mod) + int argc; + VALUE *argv; + VALUE mod; +{ + return specific_eval(argc, argv, mod, mod); +} + +VALUE rb_load_path; + +NORETURN(static void load_failed _((VALUE))); + +void +rb_load(fname, wrap) + VALUE fname; + int wrap; +{ + VALUE tmp; + int state; + volatile int prohibit_int = rb_prohibit_interrupt; + volatile ID callee, this_func; + volatile VALUE wrapper = ruby_wrapper; + volatile VALUE self = ruby_top_self; + NODE * volatile last_node; + NODE *saved_cref = ruby_cref; + TMP_PROTECT; + + if (!wrap) rb_secure(4); + FilePathValue(fname); + fname = rb_str_new4(fname); + tmp = rb_find_file(fname); + if (!tmp) { + load_failed(fname); + } + fname = tmp; + + ruby_errinfo = Qnil; /* ensure */ + PUSH_VARS(); + PUSH_CLASS(ruby_wrapper); + ruby_cref = top_cref; + if (!wrap) { + rb_secure(4); /* should alter global state */ + ruby_class = rb_cObject; + ruby_wrapper = 0; + } + else { + /* load in anonymous module as toplevel */ + ruby_class = ruby_wrapper = rb_module_new(); + self = rb_obj_clone(ruby_top_self); + rb_extend_object(self, ruby_wrapper); + PUSH_CREF(ruby_wrapper); + } + PUSH_ITER(ITER_NOT); + PUSH_FRAME(); + ruby_frame->callee = 0; + ruby_frame->this_func = 0; + ruby_frame->this_class = 0; + ruby_frame->self = self; + PUSH_SCOPE(); + /* default visibility is private at loading toplevel */ + SCOPE_SET(SCOPE_PRIVATE); + PUSH_TAG(PROT_NONE); + state = EXEC_TAG(); + callee = ruby_frame->callee; + this_func = ruby_frame->this_func; + last_node = ruby_current_node; + if (!ruby_current_node && ruby_sourcefile) { + last_node = NEW_BEGIN(0); + } + ruby_current_node = 0; + if (state == 0) { + NODE * volatile node; + volatile int critical; + + DEFER_INTS; + ruby_in_eval++; + critical = rb_thread_critical; + rb_thread_critical = Qtrue; + rb_load_file(RSTRING(fname)->ptr); + ruby_in_eval--; + node = ruby_eval_tree; + rb_thread_critical = critical; + ALLOW_INTS; + if (ruby_nerrs == 0) { + eval_node(self, node); + } + } + ruby_frame->callee = callee; + ruby_frame->this_func = this_func; + ruby_current_node = last_node; + ruby_sourcefile = 0; + ruby_set_current_source(); + if (ruby_scope->flags == SCOPE_ALLOCA && ruby_class == rb_cObject) { + if (ruby_scope->local_tbl) /* toplevel was empty */ + free(ruby_scope->local_tbl); + } + POP_TAG(); + rb_prohibit_interrupt = prohibit_int; + ruby_cref = saved_cref; + POP_SCOPE(); + POP_FRAME(); + POP_ITER(); + POP_CLASS(); + POP_VARS(); + ruby_wrapper = wrapper; + if (ruby_nerrs > 0) { + ruby_nerrs = 0; + rb_exc_raise(ruby_errinfo); + } + if (state) jump_tag_but_local_jump(state, Qundef); + if (!NIL_P(ruby_errinfo)) /* exception during load */ + rb_exc_raise(ruby_errinfo); +} + +void +rb_load_protect(fname, wrap, state) + VALUE fname; + int wrap; + int *state; +{ + int status; + + PUSH_THREAD_TAG(); + if ((status = EXEC_TAG()) == 0) { + rb_load(fname, wrap); + } + else if (status == TAG_THREAD) { + rb_thread_start_1(); + } + POP_THREAD_TAG(); + if (state) *state = status; +} + +/* + * call-seq: + * load(filename, wrap=false) => true + * + * Loads and executes the Ruby + * program in the file _filename_. If the filename does not + * resolve to an absolute path, the file is searched for in the library + * directories listed in <code>$:</code>. If the optional _wrap_ + * parameter is +true+, the loaded script will be executed + * under an anonymous module, protecting the calling program's global + * namespace. In no circumstance will any local variables in the loaded + * file be propagated to the loading environment. + */ + + +static VALUE +rb_f_load(argc, argv) + int argc; + VALUE *argv; +{ + VALUE fname, wrap; + + rb_scan_args(argc, argv, "11", &fname, &wrap); + rb_load(fname, RTEST(wrap)); + return Qtrue; +} + +VALUE ruby_dln_librefs; +static VALUE rb_features; +static st_table *loading_tbl; + +#define IS_SOEXT(e) (strcmp(e, ".so") == 0 || strcmp(e, ".o") == 0) +#ifdef DLEXT2 +#define IS_DLEXT(e) (strcmp(e, DLEXT) == 0 || strcmp(e, DLEXT2) == 0) +#else +#define IS_DLEXT(e) (strcmp(e, DLEXT) == 0) +#endif + +static char * +rb_feature_p(feature, ext, rb) + const char *feature, *ext; + int rb; +{ + VALUE v; + char *f, *e; + long i, len, elen; + + if (ext) { + len = ext - feature; + elen = strlen(ext); + } + else { + len = strlen(feature); + elen = 0; + } + for (i = 0; i < RARRAY(rb_features)->len; ++i) { + v = RARRAY(rb_features)->ptr[i]; + f = StringValuePtr(v); + if (strncmp(f, feature, len) != 0) continue; + if (!*(e = f + len)) { + if (ext) continue; + return e; + } + if (*e != '.') continue; + if ((!rb || !ext) && (IS_SOEXT(e) || IS_DLEXT(e))) { + return e; + } + if ((rb || !ext) && (strcmp(e, ".rb") == 0)) { + return e; + } + } + return 0; +} + +static const char *const loadable_ext[] = { + ".rb", DLEXT, +#ifdef DLEXT2 + DLEXT2, +#endif + 0 +}; + +static int search_required _((VALUE, VALUE *)); + +int +rb_provided(feature) + const char *feature; +{ + int i; + char *buf; + VALUE fname; + + if (rb_feature_p(feature, 0, Qfalse)) + return Qtrue; + if (loading_tbl) { + if (st_lookup(loading_tbl, (st_data_t)feature, 0)) return Qtrue; + buf = ALLOCA_N(char, strlen(feature)+8); + strcpy(buf, feature); + for (i=0; loadable_ext[i]; i++) { + strcpy(buf+strlen(feature), loadable_ext[i]); + if (st_lookup(loading_tbl, (st_data_t)buf, 0)) return Qtrue; + } + } + if (search_required(rb_str_new2(feature), &fname)) { + feature = RSTRING(fname)->ptr; + if (rb_feature_p(feature, 0, Qfalse)) + return Qtrue; + if (loading_tbl && st_lookup(loading_tbl, (st_data_t)feature, 0)) + return Qtrue; + } + return Qfalse; +} + +static void +rb_provide_feature(feature) + VALUE feature; +{ + rb_ary_push(rb_features, feature); +} + +void +rb_provide(feature) + const char *feature; +{ + rb_provide_feature(rb_str_new2(feature)); +} + +static int +load_wait(ftptr) + char *ftptr; +{ + st_data_t th; + + if (!loading_tbl) return Qfalse; + if (!st_lookup(loading_tbl, (st_data_t)ftptr, &th)) return Qfalse; + if ((rb_thread_t)th == curr_thread) return Qtrue; + do { + CHECK_INTS; + rb_thread_schedule(); + } while (st_lookup(loading_tbl, (st_data_t)ftptr, &th)); + return Qtrue; +} + +/* + * call-seq: + * require(string) => true or false + * + * Ruby tries to load the library named _string_, returning + * +true+ if successful. If the filename does not resolve to + * an absolute path, it will be searched for in the directories listed + * in <code>$:</code>. If the file has the extension ``.rb'', it is + * loaded as a source file; if the extension is ``.so'', ``.o'', or + * ``.dll'', or whatever the default shared library extension is on + * the current platform, Ruby loads the shared library as a Ruby + * extension. Otherwise, Ruby tries adding ``.rb'', ``.so'', and so on + * to the name. The name of the loaded feature is added to the array in + * <code>$"</code>. A feature will not be loaded if it's name already + * appears in <code>$"</code>. However, the file name is not converted + * to an absolute path, so that ``<code>require 'a';require + * './a'</code>'' will load <code>a.rb</code> twice. + * + * require "my-library.rb" + * require "db-driver" + */ + +VALUE +rb_f_require(obj, fname) + VALUE obj, fname; +{ + return rb_require_safe(fname, ruby_safe_level); +} + +static int +search_required(fname, path) + VALUE fname, *path; +{ + VALUE tmp; + char *ext, *ftptr; + int type; + + *path = 0; + ext = strrchr(ftptr = RSTRING(fname)->ptr, '.'); + if (ext && !strchr(ext, '/')) { + if (strcmp(".rb", ext) == 0) { + if (rb_feature_p(ftptr, ext, Qtrue)) return 'r'; + if (tmp = rb_find_file(fname)) { + tmp = rb_file_expand_path(tmp, Qnil); + ext = strrchr(ftptr = RSTRING(tmp)->ptr, '.'); + if (!rb_feature_p(ftptr, ext, Qtrue)) + *path = tmp; + return 'r'; + } + return 0; + } + else if (IS_SOEXT(ext)) { + if (rb_feature_p(ftptr, ext, Qfalse)) return 's'; + tmp = rb_str_new(RSTRING(fname)->ptr, ext-RSTRING(fname)->ptr); +#ifdef DLEXT2 + OBJ_FREEZE(tmp); + if (rb_find_file_ext(&tmp, loadable_ext+1)) { + tmp = rb_file_expand_path(tmp, Qnil); + ext = strrchr(ftptr = RSTRING(tmp)->ptr, '.'); + if (!rb_feature_p(ftptr, ext, Qfalse)) + *path = tmp; + return 's'; + } +#else + rb_str_cat2(tmp, DLEXT); + OBJ_FREEZE(tmp); + if (tmp = rb_find_file(tmp)) { + tmp = rb_file_expand_path(tmp, Qnil); + ext = strrchr(ftptr = RSTRING(tmp)->ptr, '.'); + if (!rb_feature_p(ftptr, ext, Qfalse)) + *path = tmp; + return 's'; + } +#endif + } + else if (IS_DLEXT(ext)) { + if (rb_feature_p(ftptr, ext, Qfalse)) return 's'; + if (tmp = rb_find_file(fname)) { + tmp = rb_file_expand_path(tmp, Qnil); + ext = strrchr(ftptr = RSTRING(tmp)->ptr, '.'); + if (!rb_feature_p(ftptr, ext, Qfalse)) + *path = tmp; + return 's'; + } + } + } + else if (ext = rb_feature_p(ftptr, 0, Qfalse)) { + return (*ext && (IS_SOEXT(ext) || IS_DLEXT(ext))) ? 's' : 'r'; + } + tmp = fname; + type = rb_find_file_ext(&tmp, loadable_ext); + tmp = rb_file_expand_path(tmp, Qnil); + switch (type) { + case 0: + ftptr = RSTRING(tmp)->ptr; + if ((ext = rb_feature_p(ftptr, 0, Qfalse))) { + type = strcmp(".rb", ext); + break; + } + return 0; + + default: + ext = strrchr(ftptr = RSTRING(tmp)->ptr, '.'); + if (rb_feature_p(ftptr, ext, !--type)) break; + *path = tmp; + } + return type ? 's' : 'r'; +} + +static void +load_failed(fname) + VALUE fname; +{ + rb_raise(rb_eLoadError, "no such file to load -- %s", RSTRING(fname)->ptr); +} + +VALUE +rb_require_safe(fname, safe) + VALUE fname; + int safe; +{ + VALUE result = Qnil; + volatile VALUE errinfo = ruby_errinfo; + int state; + struct { + NODE *node; + ID this_func, callee; + int vmode, safe; + } volatile saved; + char *volatile ftptr = 0; + + saved.vmode = scope_vmode; + saved.node = ruby_current_node; + saved.callee = ruby_frame->callee; + saved.this_func = ruby_frame->this_func; + saved.safe = ruby_safe_level; + PUSH_TAG(PROT_NONE); + if ((state = EXEC_TAG()) == 0) { + VALUE path; + long handle; + int found; + + ruby_safe_level = safe; + FilePathValue(fname); + *(volatile VALUE *)&fname = rb_str_new4(fname); + found = search_required(fname, &path); + if (found) { + if (!path || load_wait(RSTRING(path)->ptr)) { + result = Qfalse; + } + else { + ruby_safe_level = 0; + switch (found) { + case 'r': + /* loading ruby library should be serialized. */ + if (!loading_tbl) { + loading_tbl = st_init_strtable(); + } + /* partial state */ + ftptr = ruby_strdup(RSTRING(path)->ptr); + st_insert(loading_tbl, (st_data_t)ftptr, (st_data_t)curr_thread); + rb_load(path, 0); + break; + + case 's': + ruby_current_node = 0; + ruby_sourcefile = rb_source_filename(RSTRING(path)->ptr); + ruby_sourceline = 0; + ruby_frame->callee = 0; + ruby_frame->this_func = 0; + SCOPE_SET(SCOPE_PUBLIC); + handle = (long)dln_load(RSTRING(path)->ptr); + rb_ary_push(ruby_dln_librefs, LONG2NUM(handle)); + break; + } + rb_provide_feature(path); + result = Qtrue; + } + } + } + POP_TAG(); + ruby_current_node = saved.node; + ruby_set_current_source(); + ruby_frame->this_func = saved.this_func; + ruby_frame->callee = saved.callee; + SCOPE_SET(saved.vmode); + ruby_safe_level = saved.safe; + if (ftptr) { + if (st_delete(loading_tbl, (st_data_t *)&ftptr, 0)) { /* loading done */ + free(ftptr); + } + } + if (state) JUMP_TAG(state); + if (NIL_P(result)) { + load_failed(fname); + } + ruby_errinfo = errinfo; + + return result; +} + +VALUE +rb_require(fname) + const char *fname; +{ + VALUE fn = rb_str_new2(fname); + OBJ_FREEZE(fn); + return rb_require_safe(fn, ruby_safe_level); +} + +static void +secure_visibility(self) + VALUE self; +{ + if (ruby_safe_level >= 4 && !OBJ_TAINTED(self)) { + rb_raise(rb_eSecurityError, "Insecure: can't change method visibility"); + } +} + +static void +set_method_visibility(self, argc, argv, ex) + VALUE self; + int argc; + VALUE *argv; + ID ex; +{ + int i; + + secure_visibility(self); + for (i=0; i<argc; i++) { + rb_export_method(self, rb_to_id(argv[i]), ex); + } + rb_clear_cache_by_class(self); +} + +/* + * call-seq: + * public => self + * public(symbol, ...) => self + * + * With no arguments, sets the default visibility for subsequently + * defined methods to public. With arguments, sets the named methods to + * have public visibility. + */ + +static VALUE +rb_mod_public(argc, argv, module) + int argc; + VALUE *argv; + VALUE module; +{ + secure_visibility(module); + if (argc == 0) { + SCOPE_SET(SCOPE_PUBLIC); + } + else { + set_method_visibility(module, argc, argv, NOEX_PUBLIC); + } + return module; +} + +/* + * call-seq: + * protected => self + * protected(symbol, ...) => self + * + * With no arguments, sets the default visibility for subsequently + * defined methods to protected. With arguments, sets the named methods + * to have protected visibility. + */ + +static VALUE +rb_mod_protected(argc, argv, module) + int argc; + VALUE *argv; + VALUE module; +{ + secure_visibility(module); + if (argc == 0) { + SCOPE_SET(SCOPE_PROTECTED); + } + else { + set_method_visibility(module, argc, argv, NOEX_PROTECTED); + } + return module; +} + +/* + * call-seq: + * private => self + * private(symbol, ...) => self + * + * With no arguments, sets the default visibility for subsequently + * defined methods to private. With arguments, sets the named methods + * to have private visibility. + * + * module Mod + * def a() end + * def b() end + * private + * def c() end + * private :a + * end + * Mod.private_instance_methods #=> ["a", "c"] + */ + +static VALUE +rb_mod_private(argc, argv, module) + int argc; + VALUE *argv; + VALUE module; +{ + secure_visibility(module); + if (argc == 0) { + SCOPE_SET(SCOPE_PRIVATE); + } + else { + set_method_visibility(module, argc, argv, NOEX_PRIVATE); + } + return module; +} + +/* + * call-seq: + * mod.public_class_method(symbol, ...) => mod + * + * Makes a list of existing class methods public. + */ + +static VALUE +rb_mod_public_method(argc, argv, obj) + int argc; + VALUE *argv; + VALUE obj; +{ + set_method_visibility(CLASS_OF(obj), argc, argv, NOEX_PUBLIC); + return obj; +} + +/* + * call-seq: + * mod.private_class_method(symbol, ...) => mod + * + * Makes existing class methods private. Often used to hide the default + * constructor <code>new</code>. + * + * class SimpleSingleton # Not thread safe + * private_class_method :new + * def SimpleSingleton.create(*args, &block) + * @me = new(*args, &block) if ! @me + * @me + * end + * end + */ + +static VALUE +rb_mod_private_method(argc, argv, obj) + int argc; + VALUE *argv; + VALUE obj; +{ + set_method_visibility(CLASS_OF(obj), argc, argv, NOEX_PRIVATE); + return obj; +} + +/* + * call-seq: + * public + * public(symbol, ...) + * + * With no arguments, sets the default visibility for subsequently + * defined methods to public. With arguments, sets the named methods to + * have public visibility. + */ + +static VALUE +top_public(argc, argv) + int argc; + VALUE *argv; +{ + return rb_mod_public(argc, argv, rb_cObject); +} + +static VALUE +top_private(argc, argv) + int argc; + VALUE *argv; +{ + return rb_mod_private(argc, argv, rb_cObject); +} + +/* + * call-seq: + * module_function(symbol, ...) => self + * + * Creates module functions for the named methods. These functions may + * be called with the module as a receiver, and also become available + * as instance methods to classes that mix in the module. Module + * functions are copies of the original, and so may be changed + * independently. The instance-method versions are made private. If + * used with no arguments, subsequently defined methods become module + * functions. + * + * module Mod + * def one + * "This is one" + * end + * module_function :one + * end + * class Cls + * include Mod + * def callOne + * one + * end + * end + * Mod.one #=> "This is one" + * c = Cls.new + * c.callOne #=> "This is one" + * module Mod + * def one + * "This is the new one" + * end + * end + * Mod.one #=> "This is one" + * c.callOne #=> "This is the new one" + */ + +static VALUE +rb_mod_modfunc(argc, argv, module) + int argc; + VALUE *argv; + VALUE module; +{ + int i; + ID id; + NODE *body; + + if (TYPE(module) != T_MODULE) { + rb_raise(rb_eTypeError, "module_function must be called for modules"); + } + + secure_visibility(module); + if (argc == 0) { + SCOPE_SET(SCOPE_MODFUNC); + return module; + } + + set_method_visibility(module, argc, argv, NOEX_PRIVATE); + for (i=0; i<argc; i++) { + VALUE m = module; + + id = rb_to_id(argv[i]); + for (;;) { + body = search_method(m, id, &m); + if (body == 0) { + body = search_method(rb_cObject, id, &m); + } + if (body == 0 || body->nd_body == 0) { + rb_bug("undefined method `%s'; can't happen", rb_id2name(id)); + } + if (nd_type(body->nd_body) != NODE_ZSUPER) { + break; /* normal case: need not to follow 'super' link */ + } + m = RCLASS(m)->super; + if (!m) break; + } + rb_add_method(rb_singleton_class(module), id, body->nd_body, NOEX_PUBLIC); + } + return module; +} + +/* + * call-seq: + * append_features(mod) => mod + * + * When this module is included in another, Ruby calls + * <code>append_features</code> in this module, passing it the + * receiving module in _mod_. Ruby's default implementation is + * to add the constants, methods, and module variables of this module + * to _mod_ if this module has not already been added to + * _mod_ or one of its ancestors. See also <code>Module#include</code>. + */ + +static VALUE +rb_mod_append_features(module, include) + VALUE module, include; +{ + switch (TYPE(include)) { + case T_CLASS: + case T_MODULE: + break; + default: + Check_Type(include, T_CLASS); + break; + } + rb_include_module(include, module); + + return module; +} + +/* + * call-seq: + * include(module, ...) => self + * + * Invokes <code>Module.append_features</code> on each parameter in turn. + */ + +static VALUE +rb_mod_include(argc, argv, module) + int argc; + VALUE *argv; + VALUE module; +{ + int i; + + for (i=0; i<argc; i++) Check_Type(argv[i], T_MODULE); + while (argc--) { + rb_funcall(argv[argc], rb_intern("append_features"), 1, module); + rb_funcall(argv[argc], rb_intern("included"), 1, module); + } + return module; +} + +void +rb_obj_call_init(obj, argc, argv) + VALUE obj; + int argc; + VALUE *argv; +{ + PUSH_ITER(rb_block_given_p()?ITER_PRE:ITER_NOT); + rb_funcall2(obj, init, argc, argv); + POP_ITER(); +} + +void +rb_extend_object(obj, module) + VALUE obj, module; +{ + rb_include_module(rb_singleton_class(obj), module); +} + +/* + * call-seq: + * extend_object(obj) => obj + * + * Extends the specified object by adding this module's constants and + * methods (which are added as singleton methods). This is the callback + * method used by <code>Object#extend</code>. + * + * module Picky + * def Picky.extend_object(o) + * if String === o + * puts "Can't add Picky to a String" + * else + * puts "Picky added to #{o.class}" + * super + * end + * end + * end + * (s = Array.new).extend Picky # Call Object.extend + * (s = "quick brown fox").extend Picky + * + * <em>produces:</em> + * + * Picky added to Array + * Can't add Picky to a String + */ + +static VALUE +rb_mod_extend_object(mod, obj) + VALUE mod, obj; +{ + rb_extend_object(obj, mod); + return obj; +} + +/* + * call-seq: + * obj.extend(module, ...) => obj + * + * Adds to _obj_ the instance methods from each module given as a + * parameter. + * + * module Mod + * def hello + * "Hello from Mod.\n" + * end + * end + * + * class Klass + * def hello + * "Hello from Klass.\n" + * end + * end + * + * k = Klass.new + * k.hello #=> "Hello from Klass.\n" + * k.extend(Mod) #=> #<Klass:0x401b3bc8> + * k.hello #=> "Hello from Mod.\n" + */ + +static VALUE +rb_obj_extend(argc, argv, obj) + int argc; + VALUE *argv; + VALUE obj; +{ + int i; + + if (argc == 0) { + rb_raise(rb_eArgError, "wrong number of arguments (0 for 1)"); + } + for (i=0; i<argc; i++) Check_Type(argv[i], T_MODULE); + while (argc--) { + rb_funcall(argv[argc], rb_intern("extend_object"), 1, obj); + rb_funcall(argv[argc], rb_intern("extended"), 1, obj); + } + return obj; +} + +/* + * call-seq: + * include(module, ...) => self + * + * Invokes <code>Module.append_features</code> + * on each parameter in turn. Effectively adds the methods and constants + * in each module to the receiver. + */ + +static VALUE +top_include(argc, argv, self) + int argc; + VALUE *argv; + VALUE self; +{ + rb_secure(4); + if (ruby_wrapper) { + rb_warning("main#include in the wrapped load is effective only in wrapper module"); + return rb_mod_include(argc, argv, ruby_wrapper); + } + return rb_mod_include(argc, argv, rb_cObject); +} + +VALUE rb_f_trace_var(); +VALUE rb_f_untrace_var(); + +static void +errinfo_setter(val, id, var) + VALUE val; + ID id; + VALUE *var; +{ + if (!NIL_P(val) && !rb_obj_is_kind_of(val, rb_eException)) { + rb_raise(rb_eTypeError, "assigning non-exception to $!"); + } + *var = val; +} + +static VALUE +errat_getter(id) + ID id; +{ + return get_backtrace(ruby_errinfo); +} + +static void +errat_setter(val, id, var) + VALUE val; + ID id; + VALUE *var; +{ + if (NIL_P(ruby_errinfo)) { + rb_raise(rb_eArgError, "$! not set"); + } + set_backtrace(ruby_errinfo, val); +} + +/* + * call-seq: + * local_variables => array + * + * Returns the names of the current local variables. + * + * fred = 1 + * for i in 1..10 + * # ... + * end + * local_variables #=> ["fred", "i"] + */ + +static VALUE +rb_f_local_variables() +{ + ID *tbl; + int n, i; + VALUE ary = rb_ary_new(); + struct RVarmap *vars; + + tbl = ruby_scope->local_tbl; + if (tbl) { + n = *tbl++; + for (i=2; i<n; i++) { /* skip first 2 ($_ and $~) */ + if (!rb_is_local_id(tbl[i])) continue; /* skip flip states */ + rb_ary_push(ary, rb_str_new2(rb_id2name(tbl[i]))); + } + } + + vars = ruby_dyna_vars; + while (vars) { + if (vars->id && rb_is_local_id(vars->id)) { /* skip $_, $~ and flip states */ + rb_ary_push(ary, rb_str_new2(rb_id2name(vars->id))); + } + vars = vars->next; + } + + return ary; +} + +static VALUE rb_f_catch _((VALUE,VALUE)); +NORETURN(static VALUE rb_f_throw _((int,VALUE*))); + +struct end_proc_data { + void (*func)(); + VALUE data; + int safe; + struct end_proc_data *next; +}; + +static struct end_proc_data *end_procs, *ephemeral_end_procs, *tmp_end_procs; + +void +rb_set_end_proc(func, data) + void (*func) _((VALUE)); + VALUE data; +{ + struct end_proc_data *link = ALLOC(struct end_proc_data); + struct end_proc_data **list; + + if (ruby_wrapper) list = &ephemeral_end_procs; + else list = &end_procs; + link->next = *list; + link->func = func; + link->data = data; + link->safe = ruby_safe_level; + *list = link; +} + +void +rb_mark_end_proc() +{ + struct end_proc_data *link; + + link = end_procs; + while (link) { + rb_gc_mark(link->data); + link = link->next; + } + link = ephemeral_end_procs; + while (link) { + rb_gc_mark(link->data); + link = link->next; + } + link = tmp_end_procs; + while (link) { + rb_gc_mark(link->data); + link = link->next; + } +} + +static void call_end_proc _((VALUE data)); + +static void +call_end_proc(data) + VALUE data; +{ + PUSH_ITER(ITER_NOT); + PUSH_FRAME(); + ruby_frame->self = ruby_frame->prev->self; + ruby_frame->node = 0; + ruby_frame->callee = 0; + ruby_frame->this_func = 0; + ruby_frame->this_class = 0; + proc_invoke(data, rb_ary_new2(0), Qundef, 0); + POP_FRAME(); + POP_ITER(); +} + +static void +rb_f_END() +{ + PUSH_FRAME(); + ruby_frame->argc = 0; + ruby_frame->iter = ITER_CUR; + rb_set_end_proc(call_end_proc, rb_block_proc()); + POP_FRAME(); +} + +/* + * call-seq: + * at_exit { block } -> proc + * + * Converts _block_ to a +Proc+ object (and therefore + * binds it at the point of call) and registers it for execution when + * the program exits. If multiple handlers are registered, they are + * executed in reverse order of registration. + * + * def do_at_exit(str1) + * at_exit { print str1 } + * end + * at_exit { puts "cruel world" } + * do_at_exit("goodbye ") + * exit + * + * <em>produces:</em> + * + * goodbye cruel world + */ + +static VALUE +rb_f_at_exit() +{ + VALUE proc; + + if (!rb_block_given_p()) { + rb_raise(rb_eArgError, "called without a block"); + } + proc = rb_block_proc(); + rb_set_end_proc(call_end_proc, proc); + return proc; +} + +void +rb_exec_end_proc() +{ + struct end_proc_data *link, *tmp; + int status; + volatile int safe = ruby_safe_level; + + while (ephemeral_end_procs) { + tmp_end_procs = link = ephemeral_end_procs; + ephemeral_end_procs = 0; + while (link) { + PUSH_TAG(PROT_NONE); + if ((status = EXEC_TAG()) == 0) { + ruby_safe_level = link->safe; + (*link->func)(link->data); + } + POP_TAG(); + if (status) { + error_handle(status); + } + tmp = link; + tmp_end_procs = link = link->next; + free(tmp); + } + } + while (end_procs) { + tmp_end_procs = link = end_procs; + end_procs = 0; + while (link) { + PUSH_TAG(PROT_NONE); + if ((status = EXEC_TAG()) == 0) { + ruby_safe_level = link->safe; + (*link->func)(link->data); + } + POP_TAG(); + if (status) { + error_handle(status); + } + tmp = link; + tmp_end_procs = link = link->next; + free(tmp); + } + } + ruby_safe_level = safe; +} + +void +Init_eval() +{ + init = rb_intern("initialize"); + eqq = rb_intern("==="); + each = rb_intern("each"); + + aref = rb_intern("[]"); + aset = rb_intern("[]="); + match = rb_intern("=~"); + missing = rb_intern("method_missing"); + added = rb_intern("method_added"); + singleton_added = rb_intern("singleton_method_added"); + removed = rb_intern("method_removed"); + singleton_removed = rb_intern("singleton_method_removed"); + undefined = rb_intern("method_undefined"); + singleton_undefined = rb_intern("singleton_method_undefined"); + + __id__ = rb_intern("__id__"); + __send__ = rb_intern("__send__"); + + rb_global_variable((VALUE*)&top_scope); + rb_global_variable((VALUE*)&ruby_eval_tree); + rb_global_variable((VALUE*)&ruby_dyna_vars); + + rb_define_virtual_variable("$@", errat_getter, errat_setter); + rb_define_hooked_variable("$!", &ruby_errinfo, 0, errinfo_setter); + + rb_define_global_function("eval", rb_f_eval, -1); + rb_define_global_function("iterator?", rb_f_block_given_p, 0); + rb_define_global_function("block_given?", rb_f_block_given_p, 0); + rb_define_global_function("method_missing", rb_method_missing, -1); + rb_define_global_function("loop", rb_f_loop, 0); + + rb_define_method(rb_mKernel, "respond_to?", rb_obj_respond_to, -1); + respond_to = rb_intern("respond_to?"); + basic_respond_to = rb_method_node(rb_cObject, respond_to); + rb_global_variable((VALUE*)&basic_respond_to); + + rb_define_global_function("raise", rb_f_raise, -1); + rb_define_global_function("fail", rb_f_raise, -1); + + rb_define_global_function("caller", rb_f_caller, -1); + + rb_define_global_function("exit", rb_f_exit, -1); + rb_define_global_function("abort", rb_f_abort, -1); + + rb_define_global_function("at_exit", rb_f_at_exit, 0); + + rb_define_global_function("catch", rb_f_catch, 1); + rb_define_global_function("throw", rb_f_throw, -1); + rb_define_global_function("global_variables", rb_f_global_variables, 0); /* in variable.c */ + rb_define_global_function("local_variables", rb_f_local_variables, 0); + + rb_define_method(rb_mKernel, "send", rb_f_send, -1); + rb_define_method(rb_mKernel, "__send__", rb_f_send, -1); + rb_define_method(rb_mKernel, "instance_eval", rb_obj_instance_eval, -1); + + rb_define_private_method(rb_cModule, "append_features", rb_mod_append_features, 1); + rb_define_private_method(rb_cModule, "extend_object", rb_mod_extend_object, 1); + rb_define_private_method(rb_cModule, "include", rb_mod_include, -1); + rb_define_private_method(rb_cModule, "public", rb_mod_public, -1); + rb_define_private_method(rb_cModule, "protected", rb_mod_protected, -1); + rb_define_private_method(rb_cModule, "private", rb_mod_private, -1); + rb_define_private_method(rb_cModule, "module_function", rb_mod_modfunc, -1); + rb_define_method(rb_cModule, "method_defined?", rb_mod_method_defined, 1); + rb_define_method(rb_cModule, "public_method_defined?", rb_mod_public_method_defined, 1); + rb_define_method(rb_cModule, "private_method_defined?", rb_mod_private_method_defined, 1); + rb_define_method(rb_cModule, "protected_method_defined?", rb_mod_protected_method_defined, 1); + rb_define_method(rb_cModule, "public_class_method", rb_mod_public_method, -1); + rb_define_method(rb_cModule, "private_class_method", rb_mod_private_method, -1); + rb_define_method(rb_cModule, "module_eval", rb_mod_module_eval, -1); + rb_define_method(rb_cModule, "class_eval", rb_mod_module_eval, -1); + + rb_undef_method(rb_cClass, "module_function"); + + rb_define_private_method(rb_cModule, "remove_method", rb_mod_remove_method, -1); + rb_define_private_method(rb_cModule, "undef_method", rb_mod_undef_method, -1); + rb_define_private_method(rb_cModule, "alias_method", rb_mod_alias_method, 2); + rb_define_private_method(rb_cModule, "define_method", rb_mod_define_method, -1); + + rb_define_singleton_method(rb_cModule, "nesting", rb_mod_nesting, 0); + rb_define_singleton_method(rb_cModule, "constants", rb_mod_s_constants, 0); + + rb_define_singleton_method(ruby_top_self, "include", top_include, -1); + rb_define_singleton_method(ruby_top_self, "public", top_public, -1); + rb_define_singleton_method(ruby_top_self, "private", top_private, -1); + + rb_define_method(rb_mKernel, "extend", rb_obj_extend, -1); + + rb_define_global_function("trace_var", rb_f_trace_var, -1); /* in variable.c */ + rb_define_global_function("untrace_var", rb_f_untrace_var, -1); /* in variable.c */ + + rb_define_global_function("set_trace_func", set_trace_func, 1); + rb_global_variable(&trace_func); + + rb_define_virtual_variable("$SAFE", safe_getter, safe_setter); +} + +/* + * call-seq: + * mod.autoload(name, filename) => nil + * + * Registers _filename_ to be loaded (using <code>Kernel::require</code>) + * the first time that _module_ (which may be a <code>String</code> or + * a symbol) is accessed in the namespace of _mod_. + * + * module A + * end + * A.autoload(:B, "b") + * A::B.doit # autoloads "b" + */ + +static VALUE +rb_mod_autoload(mod, sym, file) + VALUE mod; + VALUE sym; + VALUE file; +{ + ID id = rb_to_id(sym); + + Check_SafeStr(file); + rb_autoload(mod, id, RSTRING(file)->ptr); + return Qnil; +} + +/* + * MISSING: documentation + */ + +static VALUE +rb_mod_autoload_p(mod, sym) + VALUE mod, sym; +{ + return rb_autoload_p(mod, rb_to_id(sym)); +} + +/* + * call-seq: + * autoload(module, filename) => nil + * + * Registers _filename_ to be loaded (using <code>Kernel::require</code>) + * the first time that _module_ (which may be a <code>String</code> or + * a symbol) is accessed. + * + * autoload(:MyModule, "/usr/local/lib/modules/my_module.rb") + */ + +static VALUE +rb_f_autoload(obj, sym, file) + VALUE obj; + VALUE sym; + VALUE file; +{ + return rb_mod_autoload(ruby_cbase, sym, file); +} + + +/* + * MISSING: documentation + */ + +static VALUE +rb_f_autoload_p(obj, sym) + VALUE obj; + VALUE sym; +{ + /* use ruby_cbase as same as rb_f_autoload. */ + return rb_mod_autoload_p(ruby_cbase, sym); +} + +void +Init_load() +{ + rb_load_path = rb_ary_new(); + rb_define_readonly_variable("$:", &rb_load_path); + rb_define_readonly_variable("$-I", &rb_load_path); + rb_define_readonly_variable("$LOAD_PATH", &rb_load_path); + + rb_features = rb_ary_new(); + rb_define_readonly_variable("$\"", &rb_features); + rb_define_readonly_variable("$LOADED_FEATURES", &rb_features); + + rb_define_global_function("load", rb_f_load, -1); + rb_define_global_function("require", rb_f_require, 1); + rb_define_method(rb_cModule, "autoload", rb_mod_autoload, 2); + rb_define_method(rb_cModule, "autoload?", rb_mod_autoload_p, 1); + rb_define_global_function("autoload", rb_f_autoload, 2); + rb_define_global_function("autoload?", rb_f_autoload_p, 1); + rb_global_variable(&ruby_wrapper); + + ruby_dln_librefs = rb_ary_new(); + rb_global_variable(&ruby_dln_librefs); +} + +static void +scope_dup(scope) + struct SCOPE *scope; +{ + volatile ID *tbl; + VALUE *vars; + + scope->flags |= SCOPE_DONT_RECYCLE; + if (scope->flags & SCOPE_MALLOC) return; + + if (scope->local_tbl) { + tbl = scope->local_tbl; + vars = ALLOC_N(VALUE, tbl[0]+1); + *vars++ = scope->local_vars[-1]; + MEMCPY(vars, scope->local_vars, VALUE, tbl[0]); + scope->local_vars = vars; + scope->flags |= SCOPE_MALLOC; + } +} + +static void +blk_mark(data) + struct BLOCK *data; +{ + while (data) { + rb_gc_mark_frame(&data->frame); + rb_gc_mark((VALUE)data->scope); + rb_gc_mark((VALUE)data->var); + rb_gc_mark((VALUE)data->body); + rb_gc_mark((VALUE)data->self); + rb_gc_mark((VALUE)data->dyna_vars); + rb_gc_mark((VALUE)data->cref); + rb_gc_mark(data->wrapper); + rb_gc_mark(data->block_obj); + data = data->prev; + } +} + +static void +frame_free(frame) + struct FRAME *frame; +{ + struct FRAME *tmp; + + frame = frame->prev; + while (frame) { + tmp = frame; + frame = frame->prev; + free(tmp); + } +} + +static void +blk_free(data) + struct BLOCK *data; +{ + void *tmp; + + while (data) { + frame_free(&data->frame); + tmp = data; + data = data->prev; + free(tmp); + } +} + +static void +frame_dup(frame) + struct FRAME *frame; +{ + struct FRAME *tmp; + + for (;;) { + frame->tmp = 0; /* should not preserve tmp */ + if (!frame->prev) break; + tmp = ALLOC(struct FRAME); + *tmp = *frame->prev; + frame->prev = tmp; + frame = tmp; + } +} + +static void +blk_copy_prev(block) + struct BLOCK *block; +{ + struct BLOCK *tmp; + struct RVarmap* vars; + + while (block->prev) { + tmp = ALLOC_N(struct BLOCK, 1); + MEMCPY(tmp, block->prev, struct BLOCK, 1); + scope_dup(tmp->scope); + frame_dup(&tmp->frame); + + for (vars = tmp->dyna_vars; vars; vars = vars->next) { + if (FL_TEST(vars, DVAR_DONT_RECYCLE)) break; + FL_SET(vars, DVAR_DONT_RECYCLE); + } + + block->prev = tmp; + block = tmp; + } +} + + +static void +blk_dup(dup, orig) + struct BLOCK *dup, *orig; +{ + MEMCPY(dup, orig, struct BLOCK, 1); + frame_dup(&dup->frame); + + if (dup->iter) { + blk_copy_prev(dup); + } + else { + dup->prev = 0; + } +} + +/* + * MISSING: documentation + */ + +static VALUE +proc_clone(self) + VALUE self; +{ + struct BLOCK *orig, *data; + VALUE bind; + + Data_Get_Struct(self, struct BLOCK, orig); + bind = Data_Make_Struct(rb_obj_class(self),struct BLOCK,blk_mark,blk_free,data); + CLONESETUP(bind, self); + blk_dup(data, orig); + + return bind; +} + +/* + * MISSING: documentation + */ + +static VALUE +proc_dup(self) + VALUE self; +{ + struct BLOCK *orig, *data; + VALUE bind; + + Data_Get_Struct(self, struct BLOCK, orig); + bind = Data_Make_Struct(rb_obj_class(self),struct BLOCK,blk_mark,blk_free,data); + blk_dup(data, orig); + + return bind; +} + +/* + * call-seq: + * binding -> a_binding + * + * Returns a +Binding+ object, describing the variable and + * method bindings at the point of call. This object can be used when + * calling +eval+ to execute the evaluated command in this + * environment. Also see the description of class +Binding+. + * + * def getBinding(param) + * return binding + * end + * b = getBinding("hello") + * eval("param", b) #=> "hello" + */ + +static VALUE +rb_f_binding(self) + VALUE self; +{ + struct BLOCK *data, *p; + struct RVarmap *vars; + VALUE bind; + + PUSH_BLOCK(0,0); + bind = Data_Make_Struct(rb_cBinding,struct BLOCK,blk_mark,blk_free,data); + *data = *ruby_block; + + data->orig_thread = rb_thread_current(); + data->wrapper = ruby_wrapper; + data->iter = rb_f_block_given_p(); + frame_dup(&data->frame); + if (ruby_frame->prev) { + data->frame.callee = ruby_frame->prev->callee; + data->frame.this_func = ruby_frame->prev->this_func; + data->frame.this_class = ruby_frame->prev->this_class; + } + + if (data->iter) { + blk_copy_prev(data); + } + else { + data->prev = 0; + } + + for (p = data; p; p = p->prev) { + for (vars = p->dyna_vars; vars; vars = vars->next) { + if (FL_TEST(vars, DVAR_DONT_RECYCLE)) break; + FL_SET(vars, DVAR_DONT_RECYCLE); + } + } + scope_dup(data->scope); + POP_BLOCK(); + + return bind; +} + +/* + * call-seq: + * binding.eval(string [, filename [,lineno]]) => obj + * + * Evaluates the Ruby expression(s) in <em>string</em>, in the + * <em>binding</em>'s context. If the optional <em>filename</em> and + * <em>lineno</em> parameters are present, they will be used when + * reporting syntax errors. + * + * def getBinding(param) + * return binding + * end + * b = getBinding("hello") + * b.eval("param") #=> "hello" + */ + +static VALUE +bind_eval(argc, argv, bind) + int argc; + VALUE *argv; + VALUE bind; +{ + struct BLOCK *data; + VALUE args[4]; + + rb_scan_args(argc, argv, "12", &args[0], &args[2], &args[3]); + args[1] = bind; + Data_Get_Struct(bind, struct BLOCK, data); + + return rb_f_eval(argc+1, args, data->self); +} + +#define PROC_TSHIFT (FL_USHIFT+1) +#define PROC_TMASK (FL_USER1|FL_USER2|FL_USER3) +#define PROC_TMAX (PROC_TMASK >> PROC_TSHIFT) +#define PROC_NOSAFE FL_USER4 + +#define SAFE_LEVEL_MAX PROC_TMASK + +#define proc_safe_level_p(data) (!(RBASIC(data)->flags & PROC_NOSAFE)) + +static void +proc_save_safe_level(data) + VALUE data; +{ + int safe = ruby_safe_level; + if (safe > PROC_TMAX) safe = PROC_TMAX; + FL_SET(data, (safe << PROC_TSHIFT) & PROC_TMASK); +} + +static int +proc_get_safe_level(data) + VALUE data; +{ + return (RBASIC(data)->flags & PROC_TMASK) >> PROC_TSHIFT; +} + +static void +proc_set_safe_level(data) + VALUE data; +{ + if (!proc_safe_level_p(data)) return; + ruby_safe_level = proc_get_safe_level(data); +} + +static VALUE +proc_alloc(klass, proc) + VALUE klass; + int proc; +{ + volatile VALUE block; + struct BLOCK *data, *p; + struct RVarmap *vars; + + if (!rb_block_given_p() && !rb_f_block_given_p()) { + rb_raise(rb_eArgError, "tried to create Proc object without a block"); + } + if (proc && !rb_block_given_p()) { + rb_warn("tried to create Proc object without a block"); + } + + if (!proc && ruby_block->block_obj) { + VALUE obj = ruby_block->block_obj; + if (CLASS_OF(obj) != klass) { + obj = proc_clone(obj); + RBASIC(obj)->klass = klass; + } + return obj; + } + block = Data_Make_Struct(klass, struct BLOCK, blk_mark, blk_free, data); + *data = *ruby_block; + + data->orig_thread = rb_thread_current(); + data->wrapper = ruby_wrapper; + data->iter = data->prev?Qtrue:Qfalse; + data->block_obj = block; + frame_dup(&data->frame); + if (data->iter) { + blk_copy_prev(data); + } + else { + data->prev = 0; + } + + for (p = data; p; p = p->prev) { + for (vars = p->dyna_vars; vars; vars = vars->next) { + if (FL_TEST(vars, DVAR_DONT_RECYCLE)) break; + FL_SET(vars, DVAR_DONT_RECYCLE); + } + } + scope_dup(data->scope); + proc_save_safe_level(block); + if (proc) { + data->flags |= BLOCK_LAMBDA; + } + else { + ruby_block->block_obj = block; + } + + return block; +} + +/* + * call-seq: + * Proc.new {|...| block } => a_proc + * Proc.new => a_proc + * + * Creates a new <code>Proc</code> object, bound to the current + * context. <code>Proc::new</code> may be called without a block only + * within a method with an attached block, in which case that block is + * converted to the <code>Proc</code> object. + * + * def proc_from + * Proc.new + * end + * proc = proc_from { "hello" } + * proc.call #=> "hello" + */ + +static VALUE +proc_s_new(argc, argv, klass) + int argc; + VALUE *argv; + VALUE klass; +{ + VALUE block = proc_alloc(klass, Qfalse); + + rb_obj_call_init(block, argc, argv); + return block; +} + +/* + * call-seq: + * proc { |...| block } => a_proc + * + * Equivalent to <code>Proc.new</code>. + */ + +VALUE +rb_block_proc() +{ + return proc_alloc(rb_cProc, Qfalse); +} + +VALUE +rb_f_lambda() +{ + rb_warn("rb_f_lambda() is deprecated; use rb_block_proc() instead"); + return proc_alloc(rb_cProc, Qtrue); +} + +/* + * call-seq: + * lambda { |...| block } => a_proc + * + * Equivalent to <code>Proc.new</code>, except the resulting Proc objects + * check the number of parameters passed when called. + */ + +static VALUE +proc_lambda() +{ + return proc_alloc(rb_cProc, Qtrue); +} + +static int +block_orphan(data) + struct BLOCK *data; +{ + if (data->scope->flags & SCOPE_NOSTACK) { + return 1; + } + if (data->orig_thread != rb_thread_current()) { + return 1; + } + return 0; +} + +static VALUE +proc_invoke(proc, args, self, klass) + VALUE proc, args; /* OK */ + VALUE self, klass; +{ + struct BLOCK * volatile old_block; + struct BLOCK _block; + struct BLOCK *data; + volatile VALUE result = Qundef; + int state; + volatile int safe = ruby_safe_level; + volatile VALUE old_wrapper = ruby_wrapper; + volatile int pcall, avalue = Qtrue; + VALUE bvar = Qnil, tmp = args; + + Data_Get_Struct(proc, struct BLOCK, data); + pcall = (data->flags & BLOCK_LAMBDA) ? YIELD_LAMBDA_CALL : 0; + if (!pcall && RARRAY(args)->len == 1) { + avalue = Qfalse; + args = RARRAY(args)->ptr[0]; + } + if (rb_block_given_p() && ruby_frame->callee) { + if (klass != ruby_frame->this_class) + klass = rb_obj_class(proc); + bvar = rb_block_proc(); + } + + PUSH_VARS(); + ruby_wrapper = data->wrapper; + ruby_dyna_vars = data->dyna_vars; + /* PUSH BLOCK from data */ + old_block = ruby_block; + _block = *data; + _block.block_obj = bvar; + if (self != Qundef) _block.frame.self = self; + if (klass) _block.frame.this_class = klass; + _block.frame.argc = RARRAY(tmp)->len; + if (_block.frame.argc && (ruby_frame->flags & FRAME_DMETH)) { + NEWOBJ(scope, struct SCOPE); + OBJSETUP(scope, tmp, T_SCOPE); + scope->local_tbl = _block.scope->local_tbl; + scope->local_vars = _block.scope->local_vars; + _block.scope = scope; + } + ruby_block = &_block; + + PUSH_ITER(ITER_CUR); + ruby_frame->iter = ITER_CUR; + PUSH_TAG((pcall&YIELD_LAMBDA_CALL) ? PROT_LAMBDA : PROT_NONE); + state = EXEC_TAG(); + if (state == 0) { + proc_set_safe_level(proc); + result = rb_yield_0(args, self, (self!=Qundef)?CLASS_OF(self):0, + pcall | YIELD_PROC_CALL, avalue); + } + else if (TAG_DST()) { + result = prot_tag->retval; + } + POP_TAG(); + POP_ITER(); + ruby_block = old_block; + ruby_wrapper = old_wrapper; + POP_VARS(); + if (proc_safe_level_p(proc)) ruby_safe_level = safe; + + switch (state) { + case 0: + break; + case TAG_RETRY: + proc_jump_error(TAG_RETRY, Qnil); /* xxx */ + JUMP_TAG(state); + break; + case TAG_BREAK: + if (!pcall && result != Qundef) { + proc_jump_error(state, result); + } + case TAG_RETURN: + if (result != Qundef) { + if (pcall) break; + return_jump(result); + } + default: + JUMP_TAG(state); + } + return result; +} + +/* CHECKME: are the argument checking semantics correct? */ + +/* + * call-seq: + * prc.call(params,...) => obj + * prc[params,...] => obj + * + * Invokes the block, setting the block's parameters to the values in + * <i>params</i> using something close to method calling semantics. + * Generates a warning if multiple values are passed to a proc that + * expects just one (previously this silently converted the parameters + * to an array). + * + * For procs created using <code>Kernel.proc</code>, generates an + * error if the wrong number of parameters + * are passed to a proc with multiple parameters. For procs created using + * <code>Proc.new</code>, extra parameters are silently discarded. + * + * Returns the value of the last expression evaluated in the block. See + * also <code>Proc#yield</code>. + * + * a_proc = Proc.new {|a, *b| b.collect {|i| i*a }} + * a_proc.call(9, 1, 2, 3) #=> [9, 18, 27] + * a_proc[9, 1, 2, 3] #=> [9, 18, 27] + * a_proc = Proc.new {|a,b| a} + * a_proc.call(1,2,3) + * + * <em>produces:</em> + * + * prog.rb:5: wrong number of arguments (3 for 2) (ArgumentError) + * from prog.rb:4:in `call' + * from prog.rb:5 + */ + +static VALUE +proc_call(proc, args) + VALUE proc, args; /* OK */ +{ + return proc_invoke(proc, args, Qundef, 0); +} + +int +rb_proc_arity(proc) + VALUE proc; +{ + struct BLOCK *data; + NODE *var, *list; + int n; + + Data_Get_Struct(proc, struct BLOCK, data); + var = data->var; + if (var == 0) { + if (data->body && nd_type(data->body) == NODE_IFUNC && + data->body->nd_cfnc == bmcall) { + return method_arity(data->body->nd_tval); + } + return 0; + } + if (var == (NODE*)1) return 0; + if (var == (NODE*)2) return 0; + if (nd_type(var) == NODE_BLOCK_ARG) { + var = var->nd_args; + if (var == (NODE*)1) return 0; + if (var == (NODE*)2) return 0; + } + switch (nd_type(var)) { + default: + return 1; + case NODE_MASGN: + list = var->nd_head; + n = 0; + while (list) { + n++; + list = list->nd_next; + } + if (var->nd_args) return -n-1; + return n; + } +} + +/* + * call-seq: + * prc.arity -> fixnum + * + * Returns the number of arguments that would not be ignored. If the block + * is declared to take no arguments, returns 0. If the block is known + * to take exactly n arguments, returns n. If the block has optional + * arguments, return -n-1, where n is the number of mandatory + * arguments. A <code>proc</code> with no argument declarations + * is the same a block declaring <code>||</code> as its arguments. + * + * Proc.new {}.arity #=> 0 + * Proc.new {||}.arity #=> 0 + * Proc.new {|a|}.arity #=> 1 + * Proc.new {|a,b|}.arity #=> 2 + * Proc.new {|a,b,c|}.arity #=> 3 + * Proc.new {|*a|}.arity #=> -1 + * Proc.new {|a,*b|}.arity #=> -2 + */ + +static VALUE +proc_arity(proc) + VALUE proc; +{ + int arity = rb_proc_arity(proc); + return INT2FIX(arity); +} + +/* + * call-seq: + * prc == other_proc => true or false + * + * Return <code>true</code> if <i>prc</i> is the same object as + * <i>other_proc</i>, or if they are both procs with the same body. + */ + +static VALUE +proc_eq(self, other) + VALUE self, other; +{ + struct BLOCK *data, *data2; + + if (self == other) return Qtrue; + if (TYPE(other) != T_DATA) return Qfalse; + if (RDATA(other)->dmark != (RUBY_DATA_FUNC)blk_mark) return Qfalse; + if (CLASS_OF(self) != CLASS_OF(other)) return Qfalse; + Data_Get_Struct(self, struct BLOCK, data); + Data_Get_Struct(other, struct BLOCK, data2); + if (data->body != data2->body) return Qfalse; + if (data->var != data2->var) return Qfalse; + if (data->scope != data2->scope) return Qfalse; + if (data->dyna_vars != data2->dyna_vars) return Qfalse; + if (data->flags != data2->flags) return Qfalse; + + return Qtrue; +} + +/* + * call-seq: + * prc.hash => integer + * + * Return hash value corresponding to proc body. + */ + +static VALUE +proc_hash(self) + VALUE self; +{ + struct BLOCK *data; + long hash; + + Data_Get_Struct(self, struct BLOCK, data); + hash = (long)data->body; + hash ^= (long)data->var; + hash ^= data->frame.uniq << 16; + hash ^= data->flags; + + return INT2FIX(hash); +} + +/* + * call-seq: + * prc.to_s => string + * + * Shows the unique identifier for this proc, along with + * an indication of where the proc was defined. + */ + +static VALUE +proc_to_s(self) + VALUE self; +{ + struct BLOCK *data; + NODE *node; + char *cname = rb_obj_classname(self); + const int w = (SIZEOF_LONG * CHAR_BIT) / 4; + long len = strlen(cname)+6+w; /* 6:tags 16:addr */ + VALUE str; + + Data_Get_Struct(self, struct BLOCK, data); + if ((node = data->frame.node) || (node = data->body)) { + len += strlen(node->nd_file) + 2 + (SIZEOF_LONG*CHAR_BIT-NODE_LSHIFT)/3; + str = rb_str_new(0, len); + sprintf(RSTRING(str)->ptr, "#<%s:0x%.*lx@%s:%d>", cname, w, (VALUE)data->body, + node->nd_file, nd_line(node)); + } + else { + str = rb_str_new(0, len); + sprintf(RSTRING(str)->ptr, "#<%s:0x%.*lx>", cname, w, (VALUE)data->body); + } + RSTRING(str)->len = strlen(RSTRING(str)->ptr); + if (OBJ_TAINTED(self)) OBJ_TAINT(str); + + return str; +} + +/* + * call-seq: + * prc.to_proc -> prc + * + * Part of the protocol for converting objects to <code>Proc</code> + * objects. Instances of class <code>Proc</code> simply return + * themselves. + */ + +static VALUE +proc_to_self(self) + VALUE self; +{ + return self; +} + +/* + * call-seq: + * prc.binding => binding + * + * Returns the binding associated with <i>prc</i>. Note that + * <code>Kernel#eval</code> accepts either a <code>Proc</code> or a + * <code>Binding</code> object as its second parameter. + * + * def fred(param) + * proc {} + * end + * + * b = fred(99) + * eval("param", b.binding) #=> 99 + * eval("param", b) #=> 99 + */ + +static VALUE +proc_binding(proc) + VALUE proc; +{ + struct BLOCK *orig, *data; + VALUE bind; + + Data_Get_Struct(proc, struct BLOCK, orig); + bind = Data_Make_Struct(rb_cBinding,struct BLOCK,blk_mark,blk_free,data); + MEMCPY(data, orig, struct BLOCK, 1); + frame_dup(&data->frame); + + if (data->iter) { + blk_copy_prev(data); + } + else { + data->prev = 0; + } + + return bind; +} + +static VALUE +rb_block_pass(func, arg, proc) + VALUE (*func) _((VALUE)); + VALUE arg; + VALUE proc; +{ + VALUE b; + struct BLOCK * volatile old_block; + struct BLOCK _block; + struct BLOCK *data; + volatile VALUE result = Qnil; + int state; + volatile int orphan; + volatile int safe = ruby_safe_level; + + if (NIL_P(proc)) { + PUSH_ITER(ITER_NOT); + result = (*func)(arg); + POP_ITER(); + return result; + } + if (!rb_obj_is_proc(proc)) { + b = rb_check_convert_type(proc, T_DATA, "Proc", "to_proc"); + if (!rb_obj_is_proc(b)) { + rb_raise(rb_eTypeError, "wrong argument type %s (expected Proc)", + rb_obj_classname(proc)); + } + proc = b; + } + + if (ruby_safe_level >= 1 && OBJ_TAINTED(proc)) { + if (ruby_safe_level > proc_get_safe_level(proc)) { + rb_raise(rb_eSecurityError, "Insecure: tainted block value"); + } + } + + if (ruby_block && ruby_block->block_obj == proc) { + PUSH_ITER(ITER_PRE); + result = (*func)(arg); + POP_ITER(); + return result; + } + + Data_Get_Struct(proc, struct BLOCK, data); + orphan = block_orphan(data); + + /* PUSH BLOCK from data */ + _block = *data; + _block.outer = ruby_block; + if (orphan) _block.uniq = block_unique++; + ruby_block = &_block; + PUSH_ITER(ITER_PRE); + if (ruby_frame->iter == ITER_NOT) + ruby_frame->iter = ITER_PRE; + + PUSH_TAG(PROT_LOOP); + state = EXEC_TAG(); + if (state == 0) { + retry: + proc_set_safe_level(proc); + if (safe > ruby_safe_level) + ruby_safe_level = safe; + result = (*func)(arg); + } + else if (state == TAG_BREAK && TAG_DST()) { + result = prot_tag->retval; + state = 0; + } + else if (state == TAG_RETRY) { + state = 0; + goto retry; + } + POP_TAG(); + POP_ITER(); + ruby_block = _block.outer; + if (proc_safe_level_p(proc)) ruby_safe_level = safe; + + switch (state) {/* escape from orphan block */ + case 0: + break; + case TAG_RETURN: + if (orphan) { + proc_jump_error(state, prot_tag->retval); + } + default: + JUMP_TAG(state); + } + + return result; +} + +struct block_arg { + VALUE self; + NODE *iter; +}; + +static VALUE +call_block(arg) + struct block_arg *arg; +{ + return rb_eval(arg->self, arg->iter); +} + +static VALUE +block_pass(self, node) + VALUE self; + NODE *node; +{ + struct block_arg arg; + arg.self = self; + arg.iter = node->nd_iter; + return rb_block_pass((VALUE (*)_((VALUE)))call_block, + (VALUE)&arg, rb_eval(self, node->nd_body)); +} + +struct METHOD { + VALUE klass, rklass; + VALUE recv; + ID id, oid; + NODE *body; +}; + +static void +bm_mark(data) + struct METHOD *data; +{ + rb_gc_mark(data->rklass); + rb_gc_mark(data->klass); + rb_gc_mark(data->recv); + rb_gc_mark((VALUE)data->body); +} + +static VALUE +mnew(klass, obj, id, mklass) + VALUE klass, obj, mklass; + ID id; +{ + VALUE method; + NODE *body; + int noex; + struct METHOD *data; + VALUE rklass = klass; + ID oid = id; + + again: + if ((body = rb_get_method_body(&klass, &id, &noex)) == 0) { + print_undef(rklass, oid); + } + + if (nd_type(body) == NODE_ZSUPER) { + klass = RCLASS(klass)->super; + goto again; + } + + while (rklass != klass && + (FL_TEST(rklass, FL_SINGLETON) || TYPE(rklass) == T_ICLASS)) { + rklass = RCLASS(rklass)->super; + } + if (TYPE(klass) == T_ICLASS) klass = RBASIC(klass)->klass; + method = Data_Make_Struct(mklass, struct METHOD, bm_mark, -1, data); + data->klass = klass; + data->recv = obj; + data->id = id; + data->body = body; + data->rklass = rklass; + data->oid = oid; + OBJ_INFECT(method, klass); + + return method; +} + + +/********************************************************************** + * + * Document-class : Method + * + * Method objects are created by <code>Object#method</code>, and are + * associated with a particular object (not just with a class). They + * may be used to invoke the method within the object, and as a block + * associated with an iterator. They may also be unbound from one + * object (creating an <code>UnboundMethod</code>) and bound to + * another. + * + * class Thing + * def square(n) + * n*n + * end + * end + * thing = Thing.new + * meth = thing.method(:square) + * + * meth.call(9) #=> 81 + * [ 1, 2, 3 ].collect(&meth) #=> [1, 4, 9] + * + */ + +/* + * call-seq: + * meth == other_meth => true or false + * + * Two method objects are equal if that are bound to the same + * object and contain the same body. + */ + + +static VALUE +method_eq(method, other) + VALUE method, other; +{ + struct METHOD *m1, *m2; + + if (TYPE(other) != T_DATA || RDATA(other)->dmark != (RUBY_DATA_FUNC)bm_mark) + return Qfalse; + if (CLASS_OF(method) != CLASS_OF(other)) + return Qfalse; + + Data_Get_Struct(method, struct METHOD, m1); + Data_Get_Struct(other, struct METHOD, m2); + + if (m1->klass != m2->klass || m1->rklass != m2->rklass || + m1->recv != m2->recv || m1->body != m2->body) + return Qfalse; + + return Qtrue; +} + +/* + * call-seq: + * meth.hash => integer + * + * Return a hash value corresponding to the method object. + */ + +static VALUE +method_hash(method) + VALUE method; +{ + struct METHOD *m; + long hash; + + Data_Get_Struct(method, struct METHOD, m); + hash = (long)m->klass; + hash ^= (long)m->rklass; + hash ^= (long)m->recv; + hash ^= (long)m->body; + + return INT2FIX(hash); +} + +/* + * call-seq: + * meth.unbind => unbound_method + * + * Dissociates <i>meth</i> from it's current receiver. The resulting + * <code>UnboundMethod</code> can subsequently be bound to a new object + * of the same class (see <code>UnboundMethod</code>). + */ + +static VALUE +method_unbind(obj) + VALUE obj; +{ + VALUE method; + struct METHOD *orig, *data; + + Data_Get_Struct(obj, struct METHOD, orig); + method = Data_Make_Struct(rb_cUnboundMethod, struct METHOD, bm_mark, free, data); + data->klass = orig->klass; + data->recv = Qundef; + data->id = orig->id; + data->body = orig->body; + data->rklass = orig->rklass; + data->oid = orig->oid; + OBJ_INFECT(method, obj); + + return method; +} + +/* + * call-seq: + * obj.method(sym) => method + * + * Looks up the named method as a receiver in <i>obj</i>, returning a + * <code>Method</code> object (or raising <code>NameError</code>). The + * <code>Method</code> object acts as a closure in <i>obj</i>'s object + * instance, so instance variables and the value of <code>self</code> + * remain available. + * + * class Demo + * def initialize(n) + * @iv = n + * end + * def hello() + * "Hello, @iv = #{@iv}" + * end + * end + * + * k = Demo.new(99) + * m = k.method(:hello) + * m.call #=> "Hello, @iv = 99" + * + * l = Demo.new('Fred') + * m = l.method("hello") + * m.call #=> "Hello, @iv = Fred" + */ + +static VALUE +rb_obj_method(obj, vid) + VALUE obj; + VALUE vid; +{ + return mnew(CLASS_OF(obj), obj, rb_to_id(vid), rb_cMethod); +} + +/* + * call-seq: + * mod.instance_method(symbol) => unbound_method + * + * Returns an +UnboundMethod+ representing the given + * instance method in _mod_. + * + * class Interpreter + * def do_a() print "there, "; end + * def do_d() print "Hello "; end + * def do_e() print "!\n"; end + * def do_v() print "Dave"; end + * Dispatcher = { + * ?a => instance_method(:do_a), + * ?d => instance_method(:do_d), + * ?e => instance_method(:do_e), + * ?v => instance_method(:do_v) + * } + * def interpret(string) + * string.each_byte {|b| Dispatcher[b].bind(self).call } + * end + * end + * + * + * interpreter = Interpreter.new + * interpreter.interpret('dave') + * + * <em>produces:</em> + * + * Hello there, Dave! + */ + +static VALUE +rb_mod_method(mod, vid) + VALUE mod; + VALUE vid; +{ + return mnew(mod, Qundef, rb_to_id(vid), rb_cUnboundMethod); +} + +/* + * MISSING: documentation + */ + +static VALUE +method_clone(self) + VALUE self; +{ + VALUE clone; + struct METHOD *orig, *data; + + Data_Get_Struct(self, struct METHOD, orig); + clone = Data_Make_Struct(CLASS_OF(self),struct METHOD, bm_mark, free, data); + CLONESETUP(clone, self); + *data = *orig; + + return clone; +} + +/* + * call-seq: + * meth.call(args, ...) => obj + * meth[args, ...] => obj + * + * Invokes the <i>meth</i> with the specified arguments, returning the + * method's return value. + * + * m = 12.method("+") + * m.call(3) #=> 15 + * m.call(20) #=> 32 + */ + +static VALUE +method_call(argc, argv, method) + int argc; + VALUE *argv; + VALUE method; +{ + VALUE result = Qnil; /* OK */ + struct METHOD *data; + int state; + volatile int safe = -1; + + Data_Get_Struct(method, struct METHOD, data); + if (data->recv == Qundef) { + rb_raise(rb_eTypeError, "can't call unbound method; bind first"); + } + PUSH_ITER(rb_block_given_p()?ITER_PRE:ITER_NOT); + PUSH_TAG(PROT_NONE); + if (OBJ_TAINTED(method)) { + safe = ruby_safe_level; + if (ruby_safe_level < 4) ruby_safe_level = 4; + } + if ((state = EXEC_TAG()) == 0) { + result = rb_call0(data->klass,data->recv,data->id,data->oid,argc,argv,data->body,0); + } + POP_TAG(); + POP_ITER(); + if (safe >= 0) ruby_safe_level = safe; + if (state) JUMP_TAG(state); + return result; +} + +/********************************************************************** + * + * Document-class: UnboundMethod + * + * Ruby supports two forms of objectified methods. Class + * <code>Method</code> is used to represent methods that are associated + * with a particular object: these method objects are bound to that + * object. Bound method objects for an object can be created using + * <code>Object#method</code>. + * + * Ruby also supports unbound methods; methods objects that are not + * associated with a particular object. These can be created either by + * calling <code>Module#instance_method</code> or by calling + * <code>unbind</code> on a bound method object. The result of both of + * these is an <code>UnboundMethod</code> object. + * + * Unbound methods can only be called after they are bound to an + * object. That object must be be a kind_of? the method's original + * class. + * + * class Square + * def area + * @side * @side + * end + * def initialize(side) + * @side = side + * end + * end + * + * area_un = Square.instance_method(:area) + * + * s = Square.new(12) + * area = area_un.bind(s) + * area.call #=> 144 + * + * Unbound methods are a reference to the method at the time it was + * objectified: subsequent changes to the underlying class will not + * affect the unbound method. + * + * class Test + * def test + * :original + * end + * end + * um = Test.instance_method(:test) + * class Test + * def test + * :modified + * end + * end + * t = Test.new + * t.test #=> :modified + * um.bind(t).call #=> :original + * + */ + +/* + * call-seq: + * umeth.bind(obj) -> method + * + * Bind <i>umeth</i> to <i>obj</i>. If <code>Klass</code> was the class + * from which <i>umeth</i> was obtained, + * <code>obj.kind_of?(Klass)</code> must be true. + * + * class A + * def test + * puts "In test, class = #{self.class}" + * end + * end + * class B < A + * end + * class C < B + * end + * + * + * um = B.instance_method(:test) + * bm = um.bind(C.new) + * bm.call + * bm = um.bind(B.new) + * bm.call + * bm = um.bind(A.new) + * bm.call + * + * <em>produces:</em> + * + * In test, class = C + * In test, class = B + * prog.rb:16:in `bind': bind argument must be an instance of B (TypeError) + * from prog.rb:16 + */ + +static VALUE +umethod_bind(method, recv) + VALUE method, recv; +{ + struct METHOD *data, *bound; + + Data_Get_Struct(method, struct METHOD, data); + if (data->rklass != CLASS_OF(recv)) { + if (FL_TEST(data->rklass, FL_SINGLETON)) { + rb_raise(rb_eTypeError, "singleton method called for a different object"); + } + if(!rb_obj_is_kind_of(recv, data->rklass)) { + rb_raise(rb_eTypeError, "bind argument must be an instance of %s", + rb_class2name(data->rklass)); + } + } + + method = Data_Make_Struct(rb_cMethod,struct METHOD,bm_mark,free,bound); + *bound = *data; + bound->recv = recv; + bound->rklass = CLASS_OF(recv); + + return method; +} + +int +rb_node_arity(body) + NODE *body; +{ + int n; + + switch (nd_type(body)) { + case NODE_CFUNC: + if (body->nd_argc < 0) return -1; + return body->nd_argc; + case NODE_ZSUPER: + return -1; + case NODE_ATTRSET: + return 1; + case NODE_IVAR: + return 0; + case NODE_BMETHOD: + return rb_proc_arity(body->nd_cval); + case NODE_SCOPE: + body = body->nd_next; /* skip NODE_SCOPE */ + if (nd_type(body) == NODE_BLOCK) + body = body->nd_head; + if (!body) return 0; + n = body->nd_cnt; + if (body->nd_opt || body->nd_rest != -1) + n = -n-1; + return n; + default: + rb_raise(rb_eArgError, "invalid node 0x%x", nd_type(body)); + } +} + +/* + * call-seq: + * meth.arity => fixnum + * + * Returns an indication of the number of arguments accepted by a + * method. Returns a nonnegative integer for methods that take a fixed + * number of arguments. For Ruby methods that take a variable number of + * arguments, returns -n-1, where n is the number of required + * arguments. For methods written in C, returns -1 if the call takes a + * variable number of arguments. + * + * class C + * def one; end + * def two(a); end + * def three(*a); end + * def four(a, b); end + * def five(a, b, *c); end + * def six(a, b, *c, &d); end + * end + * c = C.new + * c.method(:one).arity #=> 0 + * c.method(:two).arity #=> 1 + * c.method(:three).arity #=> -1 + * c.method(:four).arity #=> 2 + * c.method(:five).arity #=> -3 + * c.method(:six).arity #=> -3 + * + * "cat".method(:size).arity #=> 0 + * "cat".method(:replace).arity #=> 1 + * "cat".method(:squeeze).arity #=> -1 + * "cat".method(:count).arity #=> -1 + */ + +static VALUE +method_arity_m(method) + VALUE method; +{ + int n = method_arity(method); + return INT2FIX(n); +} + +static int +method_arity(method) + VALUE method; +{ + struct METHOD *data; + + Data_Get_Struct(method, struct METHOD, data); + return rb_node_arity(data->body); +} + +int +rb_mod_method_arity(mod, id) + VALUE mod; + ID id; +{ + NODE *node = rb_method_node(mod, id); + return rb_node_arity(node); +} + +int +rb_obj_method_arity(obj, id) + VALUE obj; + ID id; +{ + return rb_mod_method_arity(CLASS_OF(obj), id); +} + +/* + * call-seq: + * meth.to_s => string + * meth.inspect => string + * + * Show the name of the underlying method. + * + * "cat".method(:count).inspect #=> "#<Method: String#count>" + */ + +static VALUE +method_inspect(method) + VALUE method; +{ + struct METHOD *data; + VALUE str; + const char *s; + char *sharp = "#"; + + Data_Get_Struct(method, struct METHOD, data); + str = rb_str_buf_new2("#<"); + s = rb_obj_classname(method); + rb_str_buf_cat2(str, s); + rb_str_buf_cat2(str, ": "); + + if (FL_TEST(data->klass, FL_SINGLETON)) { + VALUE v = rb_iv_get(data->klass, "__attached__"); + + if (data->recv == Qundef) { + rb_str_buf_append(str, rb_inspect(data->klass)); + } + else if (data->recv == v) { + rb_str_buf_append(str, rb_inspect(v)); + sharp = "."; + } + else { + rb_str_buf_append(str, rb_inspect(data->recv)); + rb_str_buf_cat2(str, "("); + rb_str_buf_append(str, rb_inspect(v)); + rb_str_buf_cat2(str, ")"); + sharp = "."; + } + } + else { + rb_str_buf_cat2(str, rb_class2name(data->rklass)); + if (data->rklass != data->klass) { + rb_str_buf_cat2(str, "("); + rb_str_buf_cat2(str, rb_class2name(data->klass)); + rb_str_buf_cat2(str, ")"); + } + } + rb_str_buf_cat2(str, sharp); + rb_str_buf_cat2(str, rb_id2name(data->oid)); + rb_str_buf_cat2(str, ">"); + + return str; +} + +static VALUE +mproc(method) + VALUE method; +{ + VALUE proc; + + /* emulate ruby's method call */ + PUSH_ITER(ITER_CUR); + PUSH_FRAME(); + proc = rb_block_proc(); + POP_FRAME(); + POP_ITER(); + + return proc; +} + +static VALUE +bmcall(args, method) + VALUE args, method; +{ + volatile VALUE a; + + a = svalue_to_avalue(args); + return method_call(RARRAY(a)->len, RARRAY(a)->ptr, method); +} + +VALUE +rb_proc_new(func, val) + VALUE (*func)(ANYARGS); /* VALUE yieldarg[, VALUE procarg] */ + VALUE val; +{ + struct BLOCK *data; + VALUE proc = rb_iterate((VALUE(*)_((VALUE)))mproc, 0, func, val); + + Data_Get_Struct(proc, struct BLOCK, data); + data->body->nd_state = YIELD_FUNC_AVALUE; + return proc; +} + +/* + * call-seq: + * meth.to_proc => prc + * + * Returns a <code>Proc</code> object corresponding to this method. + */ + +static VALUE +method_proc(method) + VALUE method; +{ + VALUE proc; + struct METHOD *mdata; + struct BLOCK *bdata; + + Data_Get_Struct(method, struct METHOD, mdata); + if (nd_type(mdata->body) == NODE_BMETHOD) { + return mdata->body->nd_cval; + } + proc = rb_iterate((VALUE(*)_((VALUE)))mproc, 0, bmcall, method); + Data_Get_Struct(proc, struct BLOCK, bdata); + bdata->body->nd_file = mdata->body->nd_file; + nd_set_line(bdata->body, nd_line(mdata->body)); + bdata->body->nd_state = YIELD_FUNC_SVALUE; + bdata->flags |= BLOCK_FROM_METHOD; + + return proc; +} + +static VALUE +rb_obj_is_method(m) + VALUE m; +{ + if (TYPE(m) == T_DATA && RDATA(m)->dmark == (RUBY_DATA_FUNC)bm_mark) { + return Qtrue; + } + return Qfalse; +} + +/* + * call-seq: + * define_method(symbol, method) => new_method + * define_method(symbol) { block } => proc + * + * Defines an instance method in the receiver. The _method_ + * parameter can be a +Proc+ or +Method+ object. + * If a block is specified, it is used as the method body. This block + * is evaluated using <code>instance_eval</code>, a point that is + * tricky to demonstrate because <code>define_method</code> is private. + * (This is why we resort to the +send+ hack in this example.) + * + * class A + * def fred + * puts "In Fred" + * end + * def create_method(name, &block) + * self.class.send(:define_method, name, &block) + * end + * define_method(:wilma) { puts "Charge it!" } + * end + * class B < A + * define_method(:barney, instance_method(:fred)) + * end + * a = B.new + * a.barney + * a.wilma + * a.create_method(:betty) { p self } + * a.betty + * + * <em>produces:</em> + * + * In Fred + * Charge it! + * #<B:0x401b39e8> + */ + +static VALUE +rb_mod_define_method(argc, argv, mod) + int argc; + VALUE *argv; + VALUE mod; +{ + ID id; + VALUE body; + NODE *node; + int noex; + + if (argc == 1) { + id = rb_to_id(argv[0]); + body = proc_lambda(); + } + else if (argc == 2) { + id = rb_to_id(argv[0]); + body = argv[1]; + if (!rb_obj_is_method(body) && !rb_obj_is_proc(body)) { + rb_raise(rb_eTypeError, "wrong argument type %s (expected Proc/Method)", + rb_obj_classname(body)); + } + } + else { + rb_raise(rb_eArgError, "wrong number of arguments (%d for 1)", argc); + } + if (RDATA(body)->dmark == (RUBY_DATA_FUNC)bm_mark) { + struct METHOD *method = (struct METHOD *)DATA_PTR(body); + VALUE rklass = method->rklass; + if (rklass != mod) { + if (FL_TEST(rklass, FL_SINGLETON)) { + rb_raise(rb_eTypeError, "can't bind singleton method to a different class"); + } + if (!RTEST(rb_class_inherited_p(mod, rklass))) { + rb_raise(rb_eTypeError, "bind argument must be a subclass of %s", + rb_class2name(rklass)); + } + } + node = method->body; + } + else if (RDATA(body)->dmark == (RUBY_DATA_FUNC)blk_mark) { + struct BLOCK *block; + + body = proc_clone(body); + RBASIC(body)->flags |= PROC_NOSAFE; + Data_Get_Struct(body, struct BLOCK, block); + block->frame.callee = id; + block->frame.this_func = id; + block->frame.this_class = mod; + node = NEW_BMETHOD(body); + } + else { + /* type error */ + rb_raise(rb_eTypeError, "wrong argument type (expected Proc/Method)"); + } + + if (SCOPE_TEST(SCOPE_PRIVATE)) { + noex = NOEX_PRIVATE; + } + else if (SCOPE_TEST(SCOPE_PROTECTED)) { + noex = NOEX_PROTECTED; + } + else { + noex = NOEX_PUBLIC; + } + rb_add_method(mod, id, node, noex); + return body; +} + +/* + * <code>Proc</code> objects are blocks of code that have been bound to + * a set of local variables. Once bound, the code may be called in + * different contexts and still access those variables. + * + * def gen_times(factor) + * return Proc.new {|n| n*factor } + * end + * + * times3 = gen_times(3) + * times5 = gen_times(5) + * + * times3.call(12) #=> 36 + * times5.call(5) #=> 25 + * times3.call(times5.call(4)) #=> 60 + * + */ + +void +Init_Proc() +{ + rb_eLocalJumpError = rb_define_class("LocalJumpError", rb_eStandardError); + rb_define_method(rb_eLocalJumpError, "exit_value", localjump_xvalue, 0); + rb_define_method(rb_eLocalJumpError, "reason", localjump_reason, 0); + + exception_error = rb_exc_new2(rb_eFatal, "exception reentered"); + rb_global_variable(&exception_error); + + rb_eSysStackError = rb_define_class("SystemStackError", rb_eException); + sysstack_error = rb_exc_new2(rb_eSysStackError, "stack level too deep"); + OBJ_TAINT(sysstack_error); + rb_global_variable(&sysstack_error); + + rb_cProc = rb_define_class("Proc", rb_cObject); + rb_undef_alloc_func(rb_cProc); + rb_define_singleton_method(rb_cProc, "new", proc_s_new, -1); + + rb_define_method(rb_cProc, "clone", proc_clone, 0); + rb_define_method(rb_cProc, "dup", proc_dup, 0); + rb_define_method(rb_cProc, "call", proc_call, -2); + rb_define_method(rb_cProc, "arity", proc_arity, 0); + rb_define_method(rb_cProc, "[]", proc_call, -2); + rb_define_method(rb_cProc, "==", proc_eq, 1); + rb_define_method(rb_cProc, "eql?", proc_eq, 1); + rb_define_method(rb_cProc, "hash", proc_hash, 0); + rb_define_method(rb_cProc, "to_s", proc_to_s, 0); + rb_define_method(rb_cProc, "to_proc", proc_to_self, 0); + rb_define_method(rb_cProc, "binding", proc_binding, 0); + + rb_define_global_function("proc", rb_block_proc, 0); + rb_define_global_function("lambda", proc_lambda, 0); + + rb_cMethod = rb_define_class("Method", rb_cObject); + rb_undef_alloc_func(rb_cMethod); + rb_undef_method(CLASS_OF(rb_cMethod), "new"); + rb_define_method(rb_cMethod, "==", method_eq, 1); + rb_define_method(rb_cMethod, "eql?", method_eq, 1); + rb_define_method(rb_cMethod, "hash", method_hash, 0); + rb_define_method(rb_cMethod, "clone", method_clone, 0); + rb_define_method(rb_cMethod, "call", method_call, -1); + rb_define_method(rb_cMethod, "[]", method_call, -1); + rb_define_method(rb_cMethod, "arity", method_arity_m, 0); + rb_define_method(rb_cMethod, "inspect", method_inspect, 0); + rb_define_method(rb_cMethod, "to_s", method_inspect, 0); + rb_define_method(rb_cMethod, "to_proc", method_proc, 0); + rb_define_method(rb_cMethod, "unbind", method_unbind, 0); + rb_define_method(rb_mKernel, "method", rb_obj_method, 1); + + rb_cUnboundMethod = rb_define_class("UnboundMethod", rb_cObject); + rb_undef_alloc_func(rb_cUnboundMethod); + rb_undef_method(CLASS_OF(rb_cUnboundMethod), "new"); + rb_define_method(rb_cUnboundMethod, "==", method_eq, 1); + rb_define_method(rb_cUnboundMethod, "eql?", method_eq, 1); + rb_define_method(rb_cUnboundMethod, "hash", method_hash, 0); + rb_define_method(rb_cUnboundMethod, "clone", method_clone, 0); + rb_define_method(rb_cUnboundMethod, "arity", method_arity_m, 0); + rb_define_method(rb_cUnboundMethod, "inspect", method_inspect, 0); + rb_define_method(rb_cUnboundMethod, "to_s", method_inspect, 0); + rb_define_method(rb_cUnboundMethod, "bind", umethod_bind, 1); + rb_define_method(rb_cModule, "instance_method", rb_mod_method, 1); +} + +/* + * Objects of class <code>Binding</code> encapsulate the execution + * context at some particular place in the code and retain this context + * for future use. The variables, methods, value of <code>self</code>, + * and possibly an iterator block that can be accessed in this context + * are all retained. Binding objects can be created using + * <code>Kernel#binding</code>, and are made available to the callback + * of <code>Kernel#set_trace_func</code>. + * + * These binding objects can be passed as the second argument of the + * <code>Kernel#eval</code> method, establishing an environment for the + * evaluation. + * + * class Demo + * def initialize(n) + * @secret = n + * end + * def getBinding + * return binding() + * end + * end + * + * k1 = Demo.new(99) + * b1 = k1.getBinding + * k2 = Demo.new(-3) + * b2 = k2.getBinding + * + * eval("@secret", b1) #=> 99 + * eval("@secret", b2) #=> -3 + * eval("@secret") #=> nil + * + * Binding objects have no class-specific methods. + * + */ + +void +Init_Binding() +{ + rb_cBinding = rb_define_class("Binding", rb_cObject); + rb_undef_alloc_func(rb_cBinding); + rb_undef_method(CLASS_OF(rb_cBinding), "new"); + rb_define_method(rb_cBinding, "clone", proc_clone, 0); + rb_define_method(rb_cBinding, "eval", bind_eval, -1); + rb_define_global_function("binding", rb_f_binding, 0); +} + +#ifdef __ia64__ +#if defined(__FreeBSD__) +/* + * FreeBSD/ia64 currently does not have a way for a process to get the + * base address for the RSE backing store, so hardcode it. + */ +#define __libc_ia64_register_backing_store_base (4ULL<<61) +#else +#ifdef HAVE_UNWIND_H +#include <unwind.h> +#else +#pragma weak __libc_ia64_register_backing_store_base +extern unsigned long __libc_ia64_register_backing_store_base; +#endif +#endif +#endif + +/* Windows SEH refers data on the stack. */ +#undef SAVE_WIN32_EXCEPTION_LIST +#if defined _WIN32 || defined __CYGWIN__ +#if defined __CYGWIN__ +typedef unsigned long DWORD; +#endif + +static inline DWORD +win32_get_exception_list() +{ + DWORD p; +# if defined _MSC_VER +# ifdef _M_IX86 +# define SAVE_WIN32_EXCEPTION_LIST +# if _MSC_VER >= 1310 + /* warning: unsafe assignment to fs:0 ... this is ok */ +# pragma warning(disable: 4733) +# endif + __asm mov eax, fs:[0]; + __asm mov p, eax; +# endif +# elif defined __GNUC__ +# ifdef __i386__ +# define SAVE_WIN32_EXCEPTION_LIST + __asm__("movl %%fs:0,%0" : "=r"(p)); +# endif +# elif defined __BORLANDC__ +# define SAVE_WIN32_EXCEPTION_LIST + __emit__(0x64, 0xA1, 0, 0, 0, 0); /* mov eax, fs:[0] */ + p = _EAX; +# endif + return p; +} + +static inline void +win32_set_exception_list(p) + DWORD p; +{ +# if defined _MSC_VER +# ifdef _M_IX86 + __asm mov eax, p; + __asm mov fs:[0], eax; +# endif +# elif defined __GNUC__ +# ifdef __i386__ + __asm__("movl %0,%%fs:0" :: "r"(p)); +# endif +# elif defined __BORLANDC__ + _EAX = p; + __emit__(0x64, 0xA3, 0, 0, 0, 0); /* mov fs:[0], eax */ +# endif +} + +#if !defined SAVE_WIN32_EXCEPTION_LIST && !defined _WIN32_WCE +# error unsupported platform +#endif +#endif + +int rb_thread_pending = 0; + +VALUE rb_cThread; + +extern VALUE rb_last_status; + +enum thread_status { + THREAD_TO_KILL, + THREAD_RUNNABLE, + THREAD_STOPPED, + THREAD_KILLED, +}; + +#define WAIT_FD (1<<0) +#define WAIT_SELECT (1<<1) +#define WAIT_TIME (1<<2) +#define WAIT_JOIN (1<<3) +#define WAIT_PID (1<<4) + +/* +infty, for this purpose */ +#define DELAY_INFTY 1E30 + +#if !defined HAVE_PAUSE +# if defined _WIN32 && !defined __CYGWIN__ +# define pause() Sleep(INFINITE) +# else +# define pause() sleep(0x7fffffff) +# endif +#endif + +/* typedef struct thread * rb_thread_t; */ + +struct thread { + struct thread *next, *prev; + rb_jmpbuf_t context; +#ifdef SAVE_WIN32_EXCEPTION_LIST + DWORD win32_exception_list; +#endif + + VALUE result; + + long stk_len; + long stk_max; + VALUE *stk_ptr; + VALUE *stk_pos; +#ifdef __ia64__ + VALUE *bstr_ptr; + long bstr_len; +#endif + + struct FRAME *frame; + struct SCOPE *scope; + struct RVarmap *dyna_vars; + struct BLOCK *block; + struct iter *iter; + struct tag *tag; + VALUE klass; + VALUE wrapper; + NODE *cref; + struct ruby_env *anchor; + + int flags; /* misc. states (vmode/rb_trap_immediate/raised) */ + + NODE *node; + + int tracing; + VALUE errinfo; + VALUE last_status; + VALUE last_line; + VALUE last_match; + + int safe; + + enum thread_status status; + int wait_for; + int fd; + fd_set readfds; + fd_set writefds; + fd_set exceptfds; + int select_value; + double delay; + rb_thread_t join; + + int abort; + int priority; + VALUE thgroup; + + st_table *locals; + + VALUE thread; +}; + +#define THREAD_RAISED 0x200 /* temporary flag */ +#define THREAD_TERMINATING 0x400 /* persistent flag */ +#define THREAD_FLAGS_MASK 0x400 /* mask for persistent flags */ + +#define FOREACH_THREAD_FROM(f,x) x = f; do { x = x->next; +#define END_FOREACH_FROM(f,x) } while (x != f) + +#define FOREACH_THREAD(x) FOREACH_THREAD_FROM(curr_thread,x) +#define END_FOREACH(x) END_FOREACH_FROM(curr_thread,x) + +struct thread_status_t { + NODE *node; + + int tracing; + VALUE errinfo; + VALUE last_status; + VALUE last_line; + VALUE last_match; + + int safe; + + enum thread_status status; + int wait_for; + int fd; + fd_set readfds; + fd_set writefds; + fd_set exceptfds; + int select_value; + double delay; + rb_thread_t join; +}; + +#define THREAD_COPY_STATUS(src, dst) (void)( \ + (dst)->node = (src)->node, \ + \ + (dst)->tracing = (src)->tracing, \ + (dst)->errinfo = (src)->errinfo, \ + (dst)->last_status = (src)->last_status, \ + (dst)->last_line = (src)->last_line, \ + (dst)->last_match = (src)->last_match, \ + \ + (dst)->safe = (src)->safe, \ + \ + (dst)->status = (src)->status, \ + (dst)->wait_for = (src)->wait_for, \ + (dst)->fd = (src)->fd, \ + (dst)->readfds = (src)->readfds, \ + (dst)->writefds = (src)->writefds, \ + (dst)->exceptfds = (src)->exceptfds, \ + (dst)->select_value = (src)->select_value, \ + (dst)->delay = (src)->delay, \ + (dst)->join = (src)->join, \ + 0) + +static int +thread_set_raised() +{ + if (curr_thread->flags & THREAD_RAISED) return 1; + curr_thread->flags |= THREAD_RAISED; + return 0; +} + +static int +thread_reset_raised() +{ + if (!(curr_thread->flags & THREAD_RAISED)) return 0; + curr_thread->flags &= ~THREAD_RAISED; + return 1; +} + +static void rb_thread_ready _((rb_thread_t)); + +static VALUE run_trap_eval _((VALUE)); +static VALUE +run_trap_eval(arg) + VALUE arg; +{ + VALUE *p = (VALUE *)arg; + return rb_eval_cmd(p[0], p[1], (int)p[2]); +} + +static VALUE +rb_trap_eval(cmd, sig, safe) + VALUE cmd; + int sig, safe; +{ + int state; + VALUE val = Qnil; /* OK */ + volatile struct thread_status_t save; + VALUE arg[3]; + + arg[0] = cmd; + arg[1] = rb_ary_new3(1, INT2FIX(sig)); + arg[2] = (VALUE)safe; + THREAD_COPY_STATUS(curr_thread, &save); + rb_thread_ready(curr_thread); + PUSH_ITER(ITER_NOT); + val = rb_protect(run_trap_eval, (VALUE)&arg, &state); + POP_ITER(); + THREAD_COPY_STATUS(&save, curr_thread); + + if (state) { + rb_trap_immediate = 0; + JUMP_TAG(state); + } + + if (curr_thread->status == THREAD_STOPPED) { + rb_thread_schedule(); + } + errno = EINTR; + + return val; +} + +static const char * +thread_status_name(status) + enum thread_status status; +{ + switch (status) { + case THREAD_RUNNABLE: + return "run"; + case THREAD_STOPPED: + return "sleep"; + case THREAD_TO_KILL: + return "aborting"; + case THREAD_KILLED: + return "dead"; + default: + return "unknown"; + } +} + +/* $SAFE accessor */ +void +rb_set_safe_level(level) + int level; +{ + if (level > ruby_safe_level) { + if (level > SAFE_LEVEL_MAX) level = SAFE_LEVEL_MAX; + ruby_safe_level = level; + curr_thread->safe = level; + } +} + +static VALUE +safe_getter() +{ + return INT2NUM(ruby_safe_level); +} + +static void +safe_setter(val) + VALUE val; +{ + int level = NUM2INT(val); + + if (level < ruby_safe_level) { + rb_raise(rb_eSecurityError, "tried to downgrade safe level from %d to %d", + ruby_safe_level, level); + } + if (level > SAFE_LEVEL_MAX) level = SAFE_LEVEL_MAX; + ruby_safe_level = level; + curr_thread->safe = level; +} + +/* Return the current time as a floating-point number */ +static double +timeofday() +{ + struct timeval tv; + gettimeofday(&tv, NULL); + return (double)tv.tv_sec + (double)tv.tv_usec * 1e-6; +} + +#define STACK(addr) (th->stk_pos<(VALUE*)(addr) && (VALUE*)(addr)<th->stk_pos+th->stk_len) +#define ADJ(addr) (void*)(STACK(addr)?(((VALUE*)(addr)-th->stk_pos)+th->stk_ptr):(VALUE*)(addr)) + +static void +thread_mark(th) + rb_thread_t th; +{ + struct FRAME *frame; + struct BLOCK *block; + + rb_gc_mark(th->result); + rb_gc_mark(th->thread); + if (th->join) rb_gc_mark(th->join->thread); + + rb_gc_mark(th->klass); + rb_gc_mark(th->wrapper); + rb_gc_mark((VALUE)th->cref); + + rb_gc_mark((VALUE)th->scope); + rb_gc_mark((VALUE)th->dyna_vars); + rb_gc_mark(th->errinfo); + rb_gc_mark(th->last_line); + rb_gc_mark(th->last_match); + rb_mark_tbl(th->locals); + rb_gc_mark(th->thgroup); + + /* mark data in copied stack */ + if (th == curr_thread) return; + if (th->status == THREAD_KILLED) return; + if (th->stk_len == 0) return; /* stack not active, no need to mark. */ + if (th->stk_ptr) { + rb_gc_mark_locations(th->stk_ptr, th->stk_ptr+th->stk_len); +#if defined(THINK_C) || defined(__human68k__) + rb_gc_mark_locations(th->stk_ptr+2, th->stk_ptr+th->stk_len+2); +#endif +#ifdef __ia64__ + if (th->bstr_ptr) { + rb_gc_mark_locations(th->bstr_ptr, th->bstr_ptr+th->bstr_len); + } +#endif + } + frame = th->frame; + while (frame && frame != top_frame) { + frame = ADJ(frame); + rb_gc_mark_frame(frame); + if (frame->tmp) { + struct FRAME *tmp = frame->tmp; + + while (tmp && tmp != top_frame) { + tmp = ADJ(tmp); + rb_gc_mark_frame(tmp); + tmp = tmp->prev; + } + } + frame = frame->prev; + } + block = th->block; + while (block) { + block = ADJ(block); + rb_gc_mark_frame(&block->frame); + block = block->prev; + } +} + +static struct { + rb_thread_t thread; + VALUE proc, arg; +} new_thread; + +void +rb_gc_mark_threads() +{ + rb_thread_t th; + + /* static global mark */ + rb_gc_mark((VALUE)ruby_cref); + + if (!curr_thread) return; + FOREACH_THREAD(th) { + rb_gc_mark(th->thread); + } END_FOREACH(th); + if (new_thread.thread) { + rb_gc_mark(new_thread.thread->thread); + rb_gc_mark(new_thread.proc); + rb_gc_mark(new_thread.arg); + } +} + +static void +thread_free(th) + rb_thread_t th; +{ + if (th->stk_ptr) free(th->stk_ptr); + th->stk_ptr = 0; +#ifdef __ia64__ + if (th->bstr_ptr) free(th->bstr_ptr); + th->bstr_ptr = 0; +#endif + if (th->locals) st_free_table(th->locals); + if (th->status != THREAD_KILLED) { + if (th->prev) th->prev->next = th->next; + if (th->next) th->next->prev = th->prev; + } + if (th != main_thread) free(th); +} + +static rb_thread_t +rb_thread_check(data) + VALUE data; +{ + if (TYPE(data) != T_DATA || RDATA(data)->dmark != (RUBY_DATA_FUNC)thread_mark) { + rb_raise(rb_eTypeError, "wrong argument type %s (expected Thread)", + rb_obj_classname(data)); + } + return (rb_thread_t)RDATA(data)->data; +} + +static VALUE rb_thread_raise _((int, VALUE*, rb_thread_t)); + +static VALUE th_raise_exception; +static NODE *th_raise_node; +static VALUE th_cmd; +static int th_sig, th_safe; +static char *th_signm; + +#define RESTORE_NORMAL 1 +#define RESTORE_FATAL 2 +#define RESTORE_INTERRUPT 3 +#define RESTORE_TRAP 4 +#define RESTORE_RAISE 5 +#define RESTORE_SIGNAL 6 +#define RESTORE_EXIT 7 + +extern VALUE *rb_gc_stack_start; + +static void +rb_thread_save_context(th) + rb_thread_t th; +{ + VALUE *pos; + int len; + static VALUE tval; + + len = ruby_stack_length(&pos); + th->stk_len = 0; + th->stk_pos = pos; + if (len > th->stk_max) { + REALLOC_N(th->stk_ptr, VALUE, len); + th->stk_max = len; + } + th->stk_len = len; + FLUSH_REGISTER_WINDOWS; + MEMCPY(th->stk_ptr, th->stk_pos, VALUE, th->stk_len); +#ifdef __ia64__ + { + VALUE *top, *bot; +#ifdef HAVE_UNWIND_H + _Unwind_Context *unwctx = _UNW_createContextForSelf(); + + _UNW_currentContext(unwctx); + bot = (VALUE*)(long)_UNW_getAR(unwctx, _UNW_AR_BSP); + top = (VALUE*)(long)_UNW_getAR(unwctx, _UNW_AR_BSPSTORE); + _UNW_destroyContext(unwctx); +#else + ucontext_t ctx; + + getcontext(&ctx); + bot = (VALUE*)__libc_ia64_register_backing_store_base; + top = (VALUE*)ctx.uc_mcontext.IA64_BSPSTORE; +#endif + th->bstr_len = top - bot; + REALLOC_N(th->bstr_ptr, VALUE, th->bstr_len); + MEMCPY(th->bstr_ptr, bot, VALUE, th->bstr_len); + } +#endif +#ifdef SAVE_WIN32_EXCEPTION_LIST + th->win32_exception_list = win32_get_exception_list(); +#endif + + th->frame = ruby_frame; + th->scope = ruby_scope; + th->klass = ruby_class; + th->wrapper = ruby_wrapper; + th->cref = ruby_cref; + th->dyna_vars = ruby_dyna_vars; + th->block = ruby_block; + th->flags &= THREAD_FLAGS_MASK; + th->flags |= (rb_trap_immediate<<8) | scope_vmode; + th->iter = ruby_iter; + th->tag = prot_tag; + th->tracing = tracing; + th->errinfo = ruby_errinfo; + th->last_status = rb_last_status; + tval = rb_lastline_get(); + rb_lastline_set(th->last_line); + th->last_line = tval; + tval = rb_backref_get(); + rb_backref_set(th->last_match); + th->last_match = tval; + th->safe = ruby_safe_level; + + th->node = ruby_current_node; +} + +static int +rb_thread_switch(n) + int n; +{ + rb_trap_immediate = (curr_thread->flags&(1<<8))?1:0; + switch (n) { + case 0: + return 0; + case RESTORE_FATAL: + JUMP_TAG(TAG_FATAL); + break; + case RESTORE_INTERRUPT: + rb_interrupt(); + break; + case RESTORE_TRAP: + rb_trap_eval(th_cmd, th_sig, th_safe); + break; + case RESTORE_RAISE: + ruby_frame->callee = 0; + ruby_frame->this_func = 0; + ruby_current_node = th_raise_node; + rb_raise_jump(th_raise_exception); + break; + case RESTORE_SIGNAL: + rb_raise(rb_eSignal, "SIG%s", th_signm); + break; + case RESTORE_EXIT: + ruby_errinfo = th_raise_exception; + ruby_current_node = th_raise_node; + error_print(); + terminate_process(EXIT_FAILURE, 0, 0); + break; + case RESTORE_NORMAL: + default: + break; + } + return 1; +} + +#define THREAD_SAVE_CONTEXT(th) \ + (rb_thread_save_context(th),\ + rb_thread_switch((FLUSH_REGISTER_WINDOWS, setjmp((th)->context)))) + +NORETURN(static void rb_thread_restore_context _((rb_thread_t,int))); +NOINLINE(static void stack_extend _((rb_thread_t, int))); + +static void +stack_extend(th, exit) + rb_thread_t th; + int exit; +{ + VALUE space[1024]; + + memset(space, 0, 1); /* prevent array from optimization */ + rb_thread_restore_context(th, exit); +} + +static void +rb_thread_restore_context(th, exit) + rb_thread_t th; + int exit; +{ + VALUE v; + static rb_thread_t tmp; + static int ex; + static VALUE tval; + + if (!th->stk_ptr) rb_bug("unsaved context"); + +#if STACK_GROW_DIRECTION < 0 + if (&v > th->stk_pos) stack_extend(th, exit); +#elif STACK_GROW_DIRECTION > 0 + if (&v < th->stk_pos + th->stk_len) stack_extend(th, exit); +#else + if (&v < rb_gc_stack_start) { + /* Stack grows downward */ + if (&v > th->stk_pos) stack_extend(th, exit); + } + else { + /* Stack grows upward */ + if (&v < th->stk_pos + th->stk_len) stack_extend(th, exit); + } +#endif + + rb_trap_immediate = 0; /* inhibit interrupts from here */ + ruby_frame = th->frame; + ruby_scope = th->scope; + ruby_class = th->klass; + ruby_wrapper = th->wrapper; + ruby_cref = th->cref; + ruby_dyna_vars = th->dyna_vars; + ruby_block = th->block; + scope_vmode = th->flags&SCOPE_MASK; + ruby_iter = th->iter; + prot_tag = th->tag; + tracing = th->tracing; + ruby_errinfo = th->errinfo; + rb_last_status = th->last_status; + ruby_safe_level = th->safe; + + ruby_current_node = th->node; + +#ifdef SAVE_WIN32_EXCEPTION_LIST + win32_set_exception_list(th->win32_exception_list); +#endif + tmp = th; + ex = exit; + FLUSH_REGISTER_WINDOWS; + MEMCPY(tmp->stk_pos, tmp->stk_ptr, VALUE, tmp->stk_len); +#ifdef __ia64__ + { + VALUE *base; +#ifdef HAVE_UNWIND_H + _Unwind_Context *unwctx = _UNW_createContextForSelf(); + + _UNW_currentContext(unwctx); + base = (VALUE*)(long)_UNW_getAR(unwctx, _UNW_AR_BSP); + _UNW_destroyContext(unwctx); +#else + base = (VALUE*)__libc_ia64_register_backing_store_base; +#endif + MEMCPY(base, tmp->bstr_ptr, VALUE, tmp->bstr_len); + } +#endif + + tval = rb_lastline_get(); + rb_lastline_set(tmp->last_line); + tmp->last_line = tval; + tval = rb_backref_get(); + rb_backref_set(tmp->last_match); + tmp->last_match = tval; + + longjmp(tmp->context, ex); +} + +static void +rb_thread_ready(th) + rb_thread_t th; +{ + th->wait_for = 0; + if (th->status != THREAD_TO_KILL) { + th->status = THREAD_RUNNABLE; + } +} + +static void +rb_thread_die(th) + rb_thread_t th; +{ + th->thgroup = 0; + th->status = THREAD_KILLED; + if (th->stk_ptr) free(th->stk_ptr); + th->stk_ptr = 0; +} + +static void +rb_thread_remove(th) + rb_thread_t th; +{ + if (th->status == THREAD_KILLED) return; + + rb_thread_ready(th); + rb_thread_die(th); + th->prev->next = th->next; + th->next->prev = th->prev; +} + +static int +rb_thread_dead(th) + rb_thread_t th; +{ + return th->status == THREAD_KILLED; +} + +void +rb_thread_fd_close(fd) + int fd; +{ + rb_thread_t th; + + FOREACH_THREAD(th) { + if (((th->wait_for & WAIT_FD) && fd == th->fd) || + ((th->wait_for & WAIT_SELECT) && (fd < th->fd) && + (FD_ISSET(fd, &th->readfds) || + FD_ISSET(fd, &th->writefds) || + FD_ISSET(fd, &th->exceptfds)))) { + VALUE exc = rb_exc_new2(rb_eIOError, "stream closed"); + rb_thread_raise(1, &exc, th); + } + } + END_FOREACH(th); +} + +NORETURN(static void rb_thread_main_jump _((VALUE, int))); +static void +rb_thread_main_jump(err, tag) + VALUE err; + int tag; +{ + curr_thread = main_thread; + th_raise_exception = err; + th_raise_node = ruby_current_node; + rb_thread_restore_context(main_thread, tag); +} + +NORETURN(static void rb_thread_deadlock _((void))); +static void +rb_thread_deadlock() +{ + char msg[21+SIZEOF_LONG*2]; + VALUE e; + + sprintf(msg, "Thread(0x%lx): deadlock", curr_thread->thread); + e = rb_exc_new2(rb_eFatal, msg); + if (curr_thread == main_thread) { + rb_exc_raise(e); + } + rb_thread_main_jump(e, RESTORE_RAISE); +} + +static void +copy_fds(dst, src, max) + fd_set *dst, *src; + int max; +{ + int n = 0; + int i; + + for (i=0; i<=max; i++) { + if (FD_ISSET(i, src)) { + n = i; + FD_SET(i, dst); + } + } +} + +static int +match_fds(dst, src, max) + fd_set *dst, *src; + int max; +{ + int i; + + for (i=0; i<=max; i++) { + if (FD_ISSET(i, src) && FD_ISSET(i, dst)) { + return Qtrue; + } + } + return Qfalse; +} + +static int +intersect_fds(src, dst, max) + fd_set *src, *dst; + int max; +{ + int i, n = 0; + + for (i=0; i<=max; i++) { + if (FD_ISSET(i, dst)) { + if (FD_ISSET(i, src)) { + /* Wake up only one thread per fd. */ + FD_CLR(i, src); + n++; + } + else { + FD_CLR(i, dst); + } + } + } + return n; +} + +static int +find_bad_fds(dst, src, max) + fd_set *dst, *src; + int max; +{ + int i, test = Qfalse; + + for (i=0; i<=max; i++) { + if (FD_ISSET(i, src) && !FD_ISSET(i, dst)) { + FD_CLR(i, src); + test = Qtrue; + } + } + return test; +} + +void +rb_thread_schedule() +{ + rb_thread_t next; /* OK */ + rb_thread_t th; + rb_thread_t curr; + int found = 0; + + fd_set readfds; + fd_set writefds; + fd_set exceptfds; + struct timeval delay_tv, *delay_ptr; + double delay, now; /* OK */ + int n, max; + int need_select = 0; + int select_timeout = 0; + +#ifdef HAVE_NATIVETHREAD + if (!is_ruby_native_thread()) { + rb_bug("cross-thread violation on rb_thread_schedule()"); + } +#endif + rb_thread_pending = 0; + if (curr_thread == curr_thread->next + && curr_thread->status == THREAD_RUNNABLE) + return; + + next = 0; + curr = curr_thread; /* starting thread */ + + while (curr->status == THREAD_KILLED) { + curr = curr->prev; + } + + again: + max = -1; + FD_ZERO(&readfds); + FD_ZERO(&writefds); + FD_ZERO(&exceptfds); + delay = DELAY_INFTY; + now = -1.0; + + FOREACH_THREAD_FROM(curr, th) { + if (!found && th->status <= THREAD_RUNNABLE) { + found = 1; + } + if (th->status != THREAD_STOPPED) continue; + if (th->wait_for & WAIT_JOIN) { + if (rb_thread_dead(th->join)) { + th->status = THREAD_RUNNABLE; + found = 1; + } + } + if (th->wait_for & WAIT_FD) { + FD_SET(th->fd, &readfds); + if (max < th->fd) max = th->fd; + need_select = 1; + } + if (th->wait_for & WAIT_SELECT) { + copy_fds(&readfds, &th->readfds, th->fd); + copy_fds(&writefds, &th->writefds, th->fd); + copy_fds(&exceptfds, &th->exceptfds, th->fd); + if (max < th->fd) max = th->fd; + need_select = 1; + if (th->wait_for & WAIT_TIME) { + select_timeout = 1; + } + th->select_value = 0; + } + if (th->wait_for & WAIT_TIME) { + double th_delay; + + if (now < 0.0) now = timeofday(); + th_delay = th->delay - now; + if (th_delay <= 0.0) { + th->status = THREAD_RUNNABLE; + found = 1; + } + else if (th_delay < delay) { + delay = th_delay; + need_select = 1; + } + else if (th->delay == DELAY_INFTY) { + need_select = 1; + } + } + } + END_FOREACH_FROM(curr, th); + + /* Do the select if needed */ + if (need_select) { + /* Convert delay to a timeval */ + /* If a thread is runnable, just poll */ + if (found) { + delay_tv.tv_sec = 0; + delay_tv.tv_usec = 0; + delay_ptr = &delay_tv; + } + else if (delay == DELAY_INFTY) { + delay_ptr = 0; + } + else { + delay_tv.tv_sec = delay; + delay_tv.tv_usec = (delay - (double)delay_tv.tv_sec)*1e6; + delay_ptr = &delay_tv; + } + + n = select(max+1, &readfds, &writefds, &exceptfds, delay_ptr); + if (n < 0) { + int e = errno; + + if (rb_trap_pending) rb_trap_exec(); + if (e == EINTR) goto again; +#ifdef ERESTART + if (e == ERESTART) goto again; +#endif + FOREACH_THREAD_FROM(curr, th) { + if (th->wait_for & WAIT_SELECT) { + int v = 0; + + v |= find_bad_fds(&readfds, &th->readfds, th->fd); + v |= find_bad_fds(&writefds, &th->writefds, th->fd); + v |= find_bad_fds(&exceptfds, &th->exceptfds, th->fd); + if (v) { + th->select_value = n; + n = max; + } + } + } + END_FOREACH_FROM(curr, th); + } + if (select_timeout && n == 0) { + if (now < 0.0) now = timeofday(); + FOREACH_THREAD_FROM(curr, th) { + if (((th->wait_for&(WAIT_SELECT|WAIT_TIME)) == (WAIT_SELECT|WAIT_TIME)) && + th->delay <= now) { + th->status = THREAD_RUNNABLE; + th->wait_for = 0; + th->select_value = 0; + found = 1; + intersect_fds(&readfds, &th->readfds, max); + intersect_fds(&writefds, &th->writefds, max); + intersect_fds(&exceptfds, &th->exceptfds, max); + } + } + END_FOREACH_FROM(curr, th); + } + if (n > 0) { + now = -1.0; + /* Some descriptors are ready. + Make the corresponding threads runnable. */ + FOREACH_THREAD_FROM(curr, th) { + if ((th->wait_for&WAIT_FD) && FD_ISSET(th->fd, &readfds)) { + /* Wake up only one thread per fd. */ + FD_CLR(th->fd, &readfds); + th->status = THREAD_RUNNABLE; + th->fd = 0; + th->wait_for = 0; + found = 1; + } + if ((th->wait_for&WAIT_SELECT) && + (match_fds(&readfds, &th->readfds, max) || + match_fds(&writefds, &th->writefds, max) || + match_fds(&exceptfds, &th->exceptfds, max))) { + /* Wake up only one thread per fd. */ + th->status = THREAD_RUNNABLE; + th->wait_for = 0; + n = intersect_fds(&readfds, &th->readfds, max) + + intersect_fds(&writefds, &th->writefds, max) + + intersect_fds(&exceptfds, &th->exceptfds, max); + th->select_value = n; + found = 1; + } + } + END_FOREACH_FROM(curr, th); + } + /* The delays for some of the threads should have expired. + Go through the loop once more, to check the delays. */ + if (!found && delay != DELAY_INFTY) + goto again; + } + + FOREACH_THREAD_FROM(curr, th) { + if (th->status == THREAD_TO_KILL) { + next = th; + break; + } + if (th->status == THREAD_RUNNABLE && th->stk_ptr) { + if (!next || next->priority < th->priority) + next = th; + } + } + END_FOREACH_FROM(curr, th); + + if (!next) { + /* raise fatal error to main thread */ + curr_thread->node = ruby_current_node; + if (curr->next == curr) { + TRAP_BEG; + pause(); + TRAP_END; + } + FOREACH_THREAD_FROM(curr, th) { + warn_printf("deadlock 0x%lx: %s:", + th->thread, thread_status_name(th->status)); + if (th->wait_for & WAIT_FD) warn_printf("F(%d)", th->fd); + if (th->wait_for & WAIT_SELECT) warn_printf("S"); + if (th->wait_for & WAIT_TIME) warn_printf("T(%f)", th->delay); + if (th->wait_for & WAIT_JOIN) + warn_printf("J(0x%lx)", th->join ? th->join->thread : 0); + if (th->wait_for & WAIT_PID) warn_printf("P"); + if (!th->wait_for) warn_printf("-"); + warn_printf(" %s - %s:%d\n", + th==main_thread ? "(main)" : "", + th->node->nd_file, nd_line(th->node)); + } + END_FOREACH_FROM(curr, th); + next = main_thread; + rb_thread_ready(next); + next->status = THREAD_TO_KILL; + if (!rb_thread_dead(curr_thread)) { + rb_thread_save_context(curr_thread); + } + rb_thread_deadlock(); + } + next->wait_for = 0; + if (next->status == THREAD_RUNNABLE && next == curr_thread) { + return; + } + + /* context switch */ + if (curr == curr_thread) { + if (THREAD_SAVE_CONTEXT(curr)) { + return; + } + } + + curr_thread = next; + if (next->status == THREAD_TO_KILL) { + if (!(next->flags & THREAD_TERMINATING)) { + next->flags |= THREAD_TERMINATING; + /* terminate; execute ensure-clause if any */ + rb_thread_restore_context(next, RESTORE_FATAL); + } + } + rb_thread_restore_context(next, RESTORE_NORMAL); +} + +void +rb_thread_wait_fd(fd) + int fd; +{ + if (rb_thread_critical) return; + if (curr_thread == curr_thread->next) return; + if (curr_thread->status == THREAD_TO_KILL) return; + + curr_thread->status = THREAD_STOPPED; + curr_thread->fd = fd; + curr_thread->wait_for = WAIT_FD; + rb_thread_schedule(); +} + +int +rb_thread_fd_writable(fd) + int fd; +{ + if (rb_thread_critical) return Qtrue; + if (curr_thread == curr_thread->next) return Qtrue; + if (curr_thread->status == THREAD_TO_KILL) return Qtrue; + + curr_thread->status = THREAD_STOPPED; + FD_ZERO(&curr_thread->readfds); + FD_ZERO(&curr_thread->writefds); + FD_SET(fd, &curr_thread->writefds); + FD_ZERO(&curr_thread->exceptfds); + curr_thread->fd = fd+1; + curr_thread->wait_for = WAIT_SELECT; + rb_thread_schedule(); + return Qfalse; +} + +void +rb_thread_wait_for(time) + struct timeval time; +{ + double date; + + if (rb_thread_critical || + curr_thread == curr_thread->next || + curr_thread->status == THREAD_TO_KILL) { + int n; + int thr_critical = rb_thread_critical; +#ifndef linux + double d, limit; + limit = timeofday()+(double)time.tv_sec+(double)time.tv_usec*1e-6; +#endif + for (;;) { + rb_thread_critical = Qtrue; + TRAP_BEG; + n = select(0, 0, 0, 0, &time); + rb_thread_critical = thr_critical; + TRAP_END; + if (n == 0) return; + if (n < 0) { + switch (errno) { + case EINTR: +#ifdef ERESTART + case ERESTART: +#endif + return; + default: + rb_sys_fail("sleep"); + } + } +#ifndef linux + d = limit - timeofday(); + + time.tv_sec = (int)d; + time.tv_usec = (int)((d - (int)d)*1e6); + if (time.tv_usec < 0) { + time.tv_usec += (long)1e6; + time.tv_sec -= 1; + } + if (time.tv_sec < 0) return; +#endif + } + } + + date = timeofday() + (double)time.tv_sec + (double)time.tv_usec*1e-6; + curr_thread->status = THREAD_STOPPED; + curr_thread->delay = date; + curr_thread->wait_for = WAIT_TIME; + rb_thread_schedule(); +} + +void rb_thread_sleep_forever _((void)); + +int +rb_thread_alone() +{ + return curr_thread == curr_thread->next; +} + +int +rb_thread_select(max, read, write, except, timeout) + int max; + fd_set *read, *write, *except; + struct timeval *timeout; +{ + double limit; + int n; + + if (!read && !write && !except) { + if (!timeout) { + rb_thread_sleep_forever(); + return 0; + } + rb_thread_wait_for(*timeout); + return 0; + } + + if (timeout) { + limit = timeofday()+ + (double)timeout->tv_sec+(double)timeout->tv_usec*1e-6; + } + + if (rb_thread_critical || + curr_thread == curr_thread->next || + curr_thread->status == THREAD_TO_KILL) { +#ifndef linux + struct timeval tv, *tvp = timeout; + + if (timeout) { + tv = *timeout; + tvp = &tv; + } +#else + struct timeval *const tvp = timeout; +#endif + for (;;) { + TRAP_BEG; + n = select(max, read, write, except, tvp); + TRAP_END; + if (n < 0) { + switch (errno) { + case EINTR: +#ifdef ERESTART + case ERESTART: +#endif +#ifndef linux + if (timeout) { + double d = limit - timeofday(); + + tv.tv_sec = (unsigned int)d; + tv.tv_usec = (long)((d-(double)tv.tv_sec)*1e6); + if (tv.tv_sec < 0) tv.tv_sec = 0; + if (tv.tv_usec < 0) tv.tv_usec = 0; + } +#endif + continue; + default: + break; + } + } + return n; + } + } + + curr_thread->status = THREAD_STOPPED; + if (read) curr_thread->readfds = *read; + else FD_ZERO(&curr_thread->readfds); + if (write) curr_thread->writefds = *write; + else FD_ZERO(&curr_thread->writefds); + if (except) curr_thread->exceptfds = *except; + else FD_ZERO(&curr_thread->exceptfds); + curr_thread->fd = max; + curr_thread->wait_for = WAIT_SELECT; + if (timeout) { + curr_thread->delay = timeofday() + + (double)timeout->tv_sec + (double)timeout->tv_usec*1e-6; + curr_thread->wait_for |= WAIT_TIME; + } + rb_thread_schedule(); + if (read) *read = curr_thread->readfds; + if (write) *write = curr_thread->writefds; + if (except) *except = curr_thread->exceptfds; + return curr_thread->select_value; +} + +static int rb_thread_join _((rb_thread_t, double)); + +static int +rb_thread_join(th, limit) + rb_thread_t th; + double limit; +{ + enum thread_status last_status = THREAD_RUNNABLE; + + if (rb_thread_critical) rb_thread_deadlock(); + if (!rb_thread_dead(th)) { + if (th == curr_thread) + rb_raise(rb_eThreadError, "thread 0x%lx tried to join itself", + th->thread); + if ((th->wait_for & WAIT_JOIN) && th->join == curr_thread) + rb_raise(rb_eThreadError, "Thread#join: deadlock 0x%lx - mutual join(0x%lx)", + curr_thread->thread, th->thread); + if (curr_thread->status == THREAD_TO_KILL) + last_status = THREAD_TO_KILL; + if (limit == 0) return Qfalse; + curr_thread->status = THREAD_STOPPED; + curr_thread->join = th; + curr_thread->wait_for = WAIT_JOIN; + curr_thread->delay = timeofday() + limit; + if (limit < DELAY_INFTY) curr_thread->wait_for |= WAIT_TIME; + rb_thread_schedule(); + curr_thread->status = last_status; + if (!rb_thread_dead(th)) return Qfalse; + } + + if (!NIL_P(th->errinfo) && (th->flags & THREAD_RAISED)) { + VALUE oldbt = get_backtrace(th->errinfo); + VALUE errat = make_backtrace(); + VALUE errinfo = rb_obj_dup(th->errinfo); + + if (TYPE(oldbt) == T_ARRAY && RARRAY(oldbt)->len > 0) { + rb_ary_unshift(errat, rb_ary_entry(oldbt, 0)); + } + set_backtrace(errinfo, errat); + rb_exc_raise(errinfo); + } + + return Qtrue; +} + + +/* + * call-seq: + * thr.join => thr + * thr.join(limit) => thr + * + * The calling thread will suspend execution and run <i>thr</i>. Does not + * return until <i>thr</i> exits or until <i>limit</i> seconds have passed. If + * the time limit expires, <code>nil</code> will be returned, otherwise + * <i>thr</i> is returned. + * + * Any threads not joined will be killed when the main program exits. If + * <i>thr</i> had previously raised an exception and the + * <code>abort_on_exception</code> and <code>$DEBUG</code> flags are not set + * (so the exception has not yet been processed) it will be processed at this + * time. + * + * a = Thread.new { print "a"; sleep(10); print "b"; print "c" } + * x = Thread.new { print "x"; Thread.pass; print "y"; print "z" } + * x.join # Let x thread finish, a will be killed on exit. + * + * <em>produces:</em> + * + * axyz + * + * The following example illustrates the <i>limit</i> parameter. + * + * y = Thread.new { 4.times { sleep 0.1; puts 'tick... ' }} + * puts "Waiting" until y.join(0.15) + * + * <em>produces:</em> + * + * tick... + * Waiting + * tick... + * Waitingtick... + * + * + * tick... + */ + +static VALUE +rb_thread_join_m(argc, argv, thread) + int argc; + VALUE *argv; + VALUE thread; +{ + VALUE limit; + double delay = DELAY_INFTY; + rb_thread_t th = rb_thread_check(thread); + + rb_scan_args(argc, argv, "01", &limit); + if (!NIL_P(limit)) delay = rb_num2dbl(limit); + if (!rb_thread_join(th, delay)) + return Qnil; + return thread; +} + + +/* + * call-seq: + * Thread.current => thread + * + * Returns the currently executing thread. + * + * Thread.current #=> #<Thread:0x401bdf4c run> + */ + +VALUE +rb_thread_current() +{ + return curr_thread->thread; +} + + +/* + * call-seq: + * Thread.main => thread + * + * Returns the main thread for the process. + * + * Thread.main #=> #<Thread:0x401bdf4c run> + */ + +VALUE +rb_thread_main() +{ + return main_thread->thread; +} + + +/* + * call-seq: + * Thread.list => array + * + * Returns an array of <code>Thread</code> objects for all threads that are + * either runnable or stopped. + * + * Thread.new { sleep(200) } + * Thread.new { 1000000.times {|i| i*i } } + * Thread.new { Thread.stop } + * Thread.list.each {|t| p t} + * + * <em>produces:</em> + * + * #<Thread:0x401b3e84 sleep> + * #<Thread:0x401b3f38 run> + * #<Thread:0x401b3fb0 sleep> + * #<Thread:0x401bdf4c run> + */ + +VALUE +rb_thread_list() +{ + rb_thread_t th; + VALUE ary = rb_ary_new(); + + FOREACH_THREAD(th) { + switch (th->status) { + case THREAD_RUNNABLE: + case THREAD_STOPPED: + case THREAD_TO_KILL: + rb_ary_push(ary, th->thread); + default: + break; + } + } + END_FOREACH(th); + + return ary; +} + + +/* + * call-seq: + * thr.wakeup => thr + * + * Marks <i>thr</i> as eligible for scheduling (it may still remain blocked on + * I/O, however). Does not invoke the scheduler (see <code>Thread#run</code>). + * + * c = Thread.new { Thread.stop; puts "hey!" } + * c.wakeup + * + * <em>produces:</em> + * + * hey! + */ + +VALUE +rb_thread_wakeup(thread) + VALUE thread; +{ + rb_thread_t th = rb_thread_check(thread); + + if (th->status == THREAD_KILLED) + rb_raise(rb_eThreadError, "killed thread"); + rb_thread_ready(th); + + return thread; +} + + +/* + * call-seq: + * thr.run => thr + * + * Wakes up <i>thr</i>, making it eligible for scheduling. If not in a critical + * section, then invokes the scheduler. + * + * a = Thread.new { puts "a"; Thread.stop; puts "c" } + * Thread.pass + * puts "Got here" + * a.run + * a.join + * + * <em>produces:</em> + * + * a + * Got here + * c + */ + +VALUE +rb_thread_run(thread) + VALUE thread; +{ + rb_thread_wakeup(thread); + if (!rb_thread_critical) rb_thread_schedule(); + + return thread; +} + + +/* + * call-seq: + * thr.exit => thr or nil + * thr.kill => thr or nil + * thr.terminate => thr or nil + * + * Terminates <i>thr</i> and schedules another thread to be run. If this thread + * is already marked to be killed, <code>exit</code> returns the + * <code>Thread</code>. If this is the main thread, or the last thread, exits + * the process. + */ + +VALUE +rb_thread_kill(thread) + VALUE thread; +{ + rb_thread_t th = rb_thread_check(thread); + + if (th != curr_thread && th->safe < 4) { + rb_secure(4); + } + if (th->status == THREAD_TO_KILL || th->status == THREAD_KILLED) + return thread; + if (th == th->next || th == main_thread) rb_exit(EXIT_SUCCESS); + + rb_thread_ready(th); + th->status = THREAD_TO_KILL; + if (!rb_thread_critical) rb_thread_schedule(); + return thread; +} + + +/* + * call-seq: + * Thread.kill(thread) => thread + * + * Causes the given <em>thread</em> to exit (see <code>Thread::exit</code>). + * + * count = 0 + * a = Thread.new { loop { count += 1 } } + * sleep(0.1) #=> 0 + * Thread.kill(a) #=> #<Thread:0x401b3d30 dead> + * count #=> 93947 + * a.alive? #=> false + */ + +static VALUE +rb_thread_s_kill(obj, th) + VALUE obj, th; +{ + return rb_thread_kill(th); +} + + +/* + * call-seq: + * Thread.exit => thread + * + * Terminates the currently running thread and schedules another thread to be + * run. If this thread is already marked to be killed, <code>exit</code> + * returns the <code>Thread</code>. If this is the main thread, or the last + * thread, exit the process. + */ + +static VALUE +rb_thread_exit() +{ + return rb_thread_kill(curr_thread->thread); +} + + +/* + * call-seq: + * Thread.pass => nil + * + * Invokes the thread scheduler to pass execution to another thread. + * + * a = Thread.new { print "a"; Thread.pass; + * print "b"; Thread.pass; + * print "c" } + * b = Thread.new { print "x"; Thread.pass; + * print "y"; Thread.pass; + * print "z" } + * a.join + * b.join + * + * <em>produces:</em> + * + * axbycz + */ + +static VALUE +rb_thread_pass() +{ + rb_thread_schedule(); + return Qnil; +} + + +/* + * call-seq: + * Thread.stop => nil + * + * Stops execution of the current thread, putting it into a ``sleep'' state, + * and schedules execution of another thread. Resets the ``critical'' condition + * to <code>false</code>. + * + * a = Thread.new { print "a"; Thread.stop; print "c" } + * Thread.pass + * print "b" + * a.run + * a.join + * + * <em>produces:</em> + * + * abc + */ + +VALUE +rb_thread_stop() +{ + enum thread_status last_status = THREAD_RUNNABLE; + + rb_thread_critical = 0; + if (curr_thread == curr_thread->next) { + rb_raise(rb_eThreadError, "stopping only thread\n\tnote: use sleep to stop forever"); + } + if (curr_thread->status == THREAD_TO_KILL) + last_status = THREAD_TO_KILL; + curr_thread->status = THREAD_STOPPED; + rb_thread_schedule(); + curr_thread->status = last_status; + + return Qnil; +} + +struct timeval rb_time_timeval(); + +void +rb_thread_polling() +{ + if (curr_thread != curr_thread->next) { + curr_thread->status = THREAD_STOPPED; + curr_thread->delay = timeofday() + (double)0.06; + curr_thread->wait_for = WAIT_TIME; + rb_thread_schedule(); + } +} + +void +rb_thread_sleep(sec) + int sec; +{ + if (curr_thread == curr_thread->next) { + TRAP_BEG; + sleep(sec); + TRAP_END; + return; + } + rb_thread_wait_for(rb_time_timeval(INT2FIX(sec))); +} + +void +rb_thread_sleep_forever() +{ + int thr_critical = rb_thread_critical; + if (curr_thread == curr_thread->next || + curr_thread->status == THREAD_TO_KILL) { + rb_thread_critical = Qtrue; + TRAP_BEG; + pause(); + rb_thread_critical = thr_critical; + TRAP_END; + return; + } + + curr_thread->delay = DELAY_INFTY; + curr_thread->wait_for = WAIT_TIME; + curr_thread->status = THREAD_STOPPED; + rb_thread_schedule(); +} + + +/* + * call-seq: + * thr.priority => integer + * + * Returns the priority of <i>thr</i>. Default is zero; higher-priority threads + * will run before lower-priority threads. + * + * Thread.current.priority #=> 0 + */ + +static VALUE +rb_thread_priority(thread) + VALUE thread; +{ + return INT2NUM(rb_thread_check(thread)->priority); +} + + +/* + * call-seq: + * thr.priority= integer => thr + * + * Sets the priority of <i>thr</i> to <i>integer</i>. Higher-priority threads + * will run before lower-priority threads. + * + * count1 = count2 = 0 + * a = Thread.new do + * loop { count1 += 1 } + * end + * a.priority = -1 + * + * b = Thread.new do + * loop { count2 += 1 } + * end + * b.priority = -2 + * sleep 1 #=> 1 + * Thread.critical = 1 + * count1 #=> 622504 + * count2 #=> 5832 + */ + +static VALUE +rb_thread_priority_set(thread, prio) + VALUE thread, prio; +{ + rb_thread_t th; + + rb_secure(4); + th = rb_thread_check(thread); + + th->priority = NUM2INT(prio); + rb_thread_schedule(); + return prio; +} + + +/* + * call-seq: + * thr.safe_level => integer + * + * Returns the safe level in effect for <i>thr</i>. Setting thread-local safe + * levels can help when implementing sandboxes which run insecure code. + * + * thr = Thread.new { $SAFE = 3; sleep } + * Thread.current.safe_level #=> 0 + * thr.safe_level #=> 3 + */ + +static VALUE +rb_thread_safe_level(thread) + VALUE thread; +{ + rb_thread_t th; + + th = rb_thread_check(thread); + if (th == curr_thread) { + return INT2NUM(ruby_safe_level); + } + return INT2NUM(th->safe); +} + +static int ruby_thread_abort; +static VALUE thgroup_default; + + +/* + * call-seq: + * Thread.abort_on_exception => true or false + * + * Returns the status of the global ``abort on exception'' condition. The + * default is <code>false</code>. When set to <code>true</code>, or if the + * global <code>$DEBUG</code> flag is <code>true</code> (perhaps because the + * command line option <code>-d</code> was specified) all threads will abort + * (the process will <code>exit(0)</code>) if an exception is raised in any + * thread. See also <code>Thread::abort_on_exception=</code>. + */ + +static VALUE +rb_thread_s_abort_exc() +{ + return ruby_thread_abort?Qtrue:Qfalse; +} + + +/* + * call-seq: + * Thread.abort_on_exception= boolean => true or false + * + * When set to <code>true</code>, all threads will abort if an exception is + * raised. Returns the new state. + * + * Thread.abort_on_exception = true + * t1 = Thread.new do + * puts "In new thread" + * raise "Exception from thread" + * end + * sleep(1) + * puts "not reached" + * + * <em>produces:</em> + * + * In new thread + * prog.rb:4: Exception from thread (RuntimeError) + * from prog.rb:2:in `initialize' + * from prog.rb:2:in `new' + * from prog.rb:2 + */ + +static VALUE +rb_thread_s_abort_exc_set(self, val) + VALUE self, val; +{ + rb_secure(4); + ruby_thread_abort = RTEST(val); + return val; +} + + +/* + * call-seq: + * thr.abort_on_exception => true or false + * + * Returns the status of the thread-local ``abort on exception'' condition for + * <i>thr</i>. The default is <code>false</code>. See also + * <code>Thread::abort_on_exception=</code>. + */ + +static VALUE +rb_thread_abort_exc(thread) + VALUE thread; +{ + return rb_thread_check(thread)->abort?Qtrue:Qfalse; +} + + +/* + * call-seq: + * thr.abort_on_exception= boolean => true or false + * + * When set to <code>true</code>, causes all threads (including the main + * program) to abort if an exception is raised in <i>thr</i>. The process will + * effectively <code>exit(0)</code>. + */ + +static VALUE +rb_thread_abort_exc_set(thread, val) + VALUE thread, val; +{ + rb_secure(4); + rb_thread_check(thread)->abort = RTEST(val); + return val; +} + + +/* + * call-seq: + * thr.group => thgrp or nil + * + * Returns the <code>ThreadGroup</code> which contains <i>thr</i>, or nil if + * the thread is not a member of any group. + * + * Thread.main.group #=> #<ThreadGroup:0x4029d914> + */ + +VALUE +rb_thread_group(thread) + VALUE thread; +{ + VALUE group = rb_thread_check(thread)->thgroup; + if (!group) { + group = Qnil; + } + return group; +} + +#ifdef __ia64__ +# define IA64_INIT(x) x +#else +# define IA64_INIT(x) +#endif + +#define THREAD_ALLOC(th) do {\ + th = ALLOC(struct thread);\ +\ + th->next = 0;\ + th->prev = 0;\ +\ + th->status = THREAD_RUNNABLE;\ + th->result = 0;\ + th->flags = 0;\ +\ + th->stk_ptr = 0;\ + th->stk_len = 0;\ + th->stk_max = 0;\ + th->wait_for = 0;\ + IA64_INIT(th->bstr_ptr = 0);\ + IA64_INIT(th->bstr_len = 0);\ + FD_ZERO(&th->readfds);\ + FD_ZERO(&th->writefds);\ + FD_ZERO(&th->exceptfds);\ + th->delay = 0.0;\ + th->join = 0;\ +\ + th->frame = 0;\ + th->scope = 0;\ + th->klass = 0;\ + th->wrapper = 0;\ + th->cref = ruby_cref;\ + th->dyna_vars = ruby_dyna_vars;\ + th->block = 0;\ + th->iter = 0;\ + th->tag = 0;\ + th->tracing = 0;\ + th->errinfo = Qnil;\ + th->last_status = 0;\ + th->last_line = 0;\ + th->last_match = Qnil;\ + th->abort = 0;\ + th->priority = 0;\ + th->thgroup = thgroup_default;\ + th->locals = 0;\ + th->thread = 0;\ + th->anchor = 0;\ +} while (0) + +static rb_thread_t +rb_thread_alloc(klass) + VALUE klass; +{ + rb_thread_t th; + struct RVarmap *vars; + + THREAD_ALLOC(th); + th->thread = Data_Wrap_Struct(klass, thread_mark, thread_free, th); + + for (vars = th->dyna_vars; vars; vars = vars->next) { + if (FL_TEST(vars, DVAR_DONT_RECYCLE)) break; + FL_SET(vars, DVAR_DONT_RECYCLE); + } + return th; +} + +static int thread_init = 0; + +#if defined(_THREAD_SAFE) +static void +catch_timer(sig) + int sig; +{ +#if !defined(POSIX_SIGNAL) && !defined(BSD_SIGNAL) + signal(sig, catch_timer); +#endif + /* cause EINTR */ +} + +static pthread_t time_thread; + +static void* +thread_timer(dummy) + void *dummy; +{ + for (;;) { +#ifdef HAVE_NANOSLEEP + struct timespec req, rem; + + req.tv_sec = 0; + req.tv_nsec = 10000000; + nanosleep(&req, &rem); +#else + struct timeval tv; + tv.tv_sec = 0; + tv.tv_usec = 10000; + select(0, NULL, NULL, NULL, &tv); +#endif + if (!rb_thread_critical) { + rb_thread_pending = 1; + if (rb_trap_immediate) { + pthread_kill(ruby_thid, SIGVTALRM); + } + } + } +} + +void +rb_thread_start_timer() +{ +} + +void +rb_thread_stop_timer() +{ +} +#elif defined(HAVE_SETITIMER) +static void +catch_timer(sig) + int sig; +{ +#if !defined(POSIX_SIGNAL) && !defined(BSD_SIGNAL) + signal(sig, catch_timer); +#endif + if (!rb_thread_critical) { + rb_thread_pending = 1; + } + /* cause EINTR */ +} + +void +rb_thread_start_timer() +{ + struct itimerval tval; + + if (!thread_init) return; + tval.it_interval.tv_sec = 0; + tval.it_interval.tv_usec = 10000; + tval.it_value = tval.it_interval; + setitimer(ITIMER_VIRTUAL, &tval, NULL); +} + +void +rb_thread_stop_timer() +{ + struct itimerval tval; + + if (!thread_init) return; + tval.it_interval.tv_sec = 0; + tval.it_interval.tv_usec = 0; + tval.it_value = tval.it_interval; + setitimer(ITIMER_VIRTUAL, &tval, NULL); +} +#else /* !(_THREAD_SAFE || HAVE_SETITIMER) */ +int rb_thread_tick = THREAD_TICK; +#endif + +NORETURN(static void rb_thread_terminated _((rb_thread_t, int, enum thread_status))); +static VALUE rb_thread_yield _((VALUE, rb_thread_t)); + +static void +push_thread_anchor(ip) + struct ruby_env *ip; +{ + ip->tag = prot_tag; + ip->frame = ruby_frame; + ip->block = ruby_block; + ip->scope = ruby_scope; + ip->iter = ruby_iter; + ip->cref = ruby_cref; + ip->prev = curr_thread->anchor; + curr_thread->anchor = ip; +} + +static void +pop_thread_anchor(ip) + struct ruby_env *ip; +{ + curr_thread->anchor = ip->prev; +} + +static void +thread_insert(th) + rb_thread_t th; +{ + if (!th->next) { + /* merge in thread list */ + th->prev = curr_thread; + curr_thread->next->prev = th; + th->next = curr_thread->next; + curr_thread->next = th; + th->priority = curr_thread->priority; + th->thgroup = curr_thread->thgroup; + } +} + +static VALUE +rb_thread_start_0(fn, arg, th) + VALUE (*fn)(); + void *arg; + rb_thread_t th; +{ + volatile rb_thread_t th_save = th; + volatile VALUE thread = th->thread; + struct BLOCK *volatile saved_block = 0; + enum thread_status status; + int state; + + if (OBJ_FROZEN(curr_thread->thgroup)) { + rb_raise(rb_eThreadError, + "can't start a new thread (frozen ThreadGroup)"); + } + + if (!thread_init) { + thread_init = 1; +#if defined(HAVE_SETITIMER) || defined(_THREAD_SAFE) +#if defined(POSIX_SIGNAL) + posix_signal(SIGVTALRM, catch_timer); +#else + signal(SIGVTALRM, catch_timer); +#endif + +#ifdef _THREAD_SAFE + pthread_create(&time_thread, 0, thread_timer, 0); +#else + rb_thread_start_timer(); +#endif +#endif + } + + if (THREAD_SAVE_CONTEXT(curr_thread)) { + return thread; + } + + if (fn == rb_thread_yield && curr_thread->anchor) { + struct ruby_env *ip = curr_thread->anchor; + new_thread.thread = th; + new_thread.proc = rb_block_proc(); + new_thread.arg = (VALUE)arg; + th->anchor = ip; + thread_insert(th); + curr_thread = th; + longjmp((prot_tag = ip->tag)->buf, TAG_THREAD); + } + + if (ruby_block) { /* should nail down higher blocks */ + struct BLOCK dummy; + + dummy.prev = ruby_block; + blk_copy_prev(&dummy); + saved_block = ruby_block = dummy.prev; + } + scope_dup(ruby_scope); + + thread_insert(th); + + PUSH_TAG(PROT_NONE); + if ((state = EXEC_TAG()) == 0) { + if (THREAD_SAVE_CONTEXT(th) == 0) { + curr_thread = th; + th->result = (*fn)(arg, th); + } + th = th_save; + } + else if (TAG_DST()) { + th = th_save; + th->result = prot_tag->retval; + } + POP_TAG(); + status = th->status; + + if (th == main_thread) ruby_stop(state); + rb_thread_remove(th); + + if (saved_block) { + blk_free(saved_block); + } + + rb_thread_terminated(th, state, status); + return 0; /* not reached */ +} + +static void +rb_thread_terminated(th, state, status) + rb_thread_t th; + int state; + enum thread_status status; +{ + if (state && status != THREAD_TO_KILL && !NIL_P(ruby_errinfo)) { + th->flags |= THREAD_RAISED; + if (state == TAG_FATAL) { + /* fatal error within this thread, need to stop whole script */ + main_thread->errinfo = ruby_errinfo; + rb_thread_cleanup(); + } + else if (rb_obj_is_kind_of(ruby_errinfo, rb_eSystemExit)) { + if (th->safe >= 4) { + char buf[32]; + + sprintf(buf, "Insecure exit at level %d", th->safe); + th->errinfo = rb_exc_new2(rb_eSecurityError, buf); + } + else { + /* delegate exception to main_thread */ + rb_thread_main_jump(ruby_errinfo, RESTORE_RAISE); + } + } + else if (th->safe < 4 && (ruby_thread_abort || th->abort || RTEST(ruby_debug))) { + /* exit on main_thread */ + rb_thread_main_jump(ruby_errinfo, RESTORE_EXIT); + } + else { + th->errinfo = ruby_errinfo; + } + } + rb_thread_schedule(); + ruby_stop(0); /* last thread termination */ +} + +static VALUE +rb_thread_yield_0(arg) + VALUE arg; +{ + return rb_thread_yield(arg, curr_thread); +} + +static void +rb_thread_start_1() +{ + rb_thread_t th = new_thread.thread; + volatile rb_thread_t th_save = th; + VALUE proc = new_thread.proc; + VALUE arg = new_thread.arg; + struct ruby_env *ip = th->anchor; + enum thread_status status; + int state; + + ruby_frame = ip->frame; + ruby_block = ip->block; + ruby_scope = ip->scope; + ruby_iter = ip->iter; + ruby_cref = ip->cref; + ruby_dyna_vars = ((struct BLOCK *)DATA_PTR(proc))->dyna_vars; + PUSH_FRAME(); + *ruby_frame = *ip->frame; + ruby_frame->prev = ip->frame; + ruby_frame->iter = ITER_CUR; + PUSH_TAG(PROT_NONE); + if ((state = EXEC_TAG()) == 0) { + if (THREAD_SAVE_CONTEXT(th) == 0) { + new_thread.thread = 0; + th->result = rb_block_pass(rb_thread_yield_0, arg, proc); + } + th = th_save; + } + else if (TAG_DST()) { + th = th_save; + th->result = prot_tag->retval; + } + POP_TAG(); + POP_FRAME(); + status = th->status; + + if (th == main_thread) ruby_stop(state); + rb_thread_remove(th); + rb_thread_terminated(th, state, status); +} + +VALUE +rb_thread_create(fn, arg) + VALUE (*fn)(); + void *arg; +{ + Init_stack((VALUE*)&arg); + return rb_thread_start_0(fn, arg, rb_thread_alloc(rb_cThread)); +} + +static VALUE +rb_thread_yield(arg, th) + VALUE arg; + rb_thread_t th; +{ + const ID *tbl; + + scope_dup(ruby_block->scope); + + tbl = ruby_scope->local_tbl; + if (tbl) { + int n = *tbl++; + for (tbl += 2, n -= 2; n > 0; --n) { /* skip first 2 ($_ and $~) */ + ID id = *tbl++; + if (id != 0 && !rb_is_local_id(id)) /* push flip states */ + rb_dvar_push(id, Qfalse); + } + } + rb_dvar_push('_', Qnil); + rb_dvar_push('~', Qnil); + ruby_block->dyna_vars = ruby_dyna_vars; + + return rb_yield_0(arg, 0, 0, YIELD_LAMBDA_CALL, Qtrue); +} + +/* + * call-seq: + * Thread.new([arg]*) {|args| block } => thread + * + * Creates and runs a new thread to execute the instructions given in + * <i>block</i>. Any arguments passed to <code>Thread::new</code> are passed + * into the block. + * + * x = Thread.new { sleep 0.1; print "x"; print "y"; print "z" } + * a = Thread.new { print "a"; print "b"; sleep 0.2; print "c" } + * x.join # Let the threads finish before + * a.join # main thread exits... + * + * <em>produces:</em> + * + * abxyzc + */ + +static VALUE +rb_thread_s_new(argc, argv, klass) + int argc; + VALUE *argv; + VALUE klass; +{ + rb_thread_t th = rb_thread_alloc(klass); + volatile VALUE *pos; + + pos = th->stk_pos; + rb_obj_call_init(th->thread, argc, argv); + if (th->stk_pos == 0) { + rb_raise(rb_eThreadError, "uninitialized thread - check `%s#initialize'", + rb_class2name(klass)); + } + + return th->thread; +} + + +/* + * call-seq: + * Thread.new([arg]*) {|args| block } => thread + * + * Creates and runs a new thread to execute the instructions given in + * <i>block</i>. Any arguments passed to <code>Thread::new</code> are passed + * into the block. + * + * x = Thread.new { sleep 0.1; print "x"; print "y"; print "z" } + * a = Thread.new { print "a"; print "b"; sleep 0.2; print "c" } + * x.join # Let the threads finish before + * a.join # main thread exits... + * + * <em>produces:</em> + * + * abxyzc + */ + +static VALUE +rb_thread_initialize(thread, args) + VALUE thread, args; +{ + rb_thread_t th; + + if (!rb_block_given_p()) { + rb_raise(rb_eThreadError, "must be called with a block"); + } + th = rb_thread_check(thread); + if (th->stk_max) { + NODE *node = th->node; + if (!node) { + rb_raise(rb_eThreadError, "already initialized thread"); + } + rb_raise(rb_eThreadError, "already initialized thread - %s:%d", + node->nd_file, nd_line(node)); + } + return rb_thread_start_0(rb_thread_yield, args, th); +} + + +/* + * call-seq: + * Thread.start([args]*) {|args| block } => thread + * Thread.fork([args]*) {|args| block } => thread + * + * Basically the same as <code>Thread::new</code>. However, if class + * <code>Thread</code> is subclassed, then calling <code>start</code> in that + * subclass will not invoke the subclass's <code>initialize</code> method. + */ + +static VALUE +rb_thread_start(klass, args) + VALUE klass, args; +{ + if (!rb_block_given_p()) { + rb_raise(rb_eThreadError, "must be called with a block"); + } + return rb_thread_start_0(rb_thread_yield, args, rb_thread_alloc(klass)); +} + + +/* + * call-seq: + * thr.value => obj + * + * Waits for <i>thr</i> to complete (via <code>Thread#join</code>) and returns + * its value. + * + * a = Thread.new { 2 + 2 } + * a.value #=> 4 + */ + +static VALUE +rb_thread_value(thread) + VALUE thread; +{ + rb_thread_t th = rb_thread_check(thread); + + while (!rb_thread_join(th, DELAY_INFTY)); + + return th->result; +} + + +/* + * call-seq: + * thr.status => string, false or nil + * + * Returns the status of <i>thr</i>: ``<code>sleep</code>'' if <i>thr</i> is + * sleeping or waiting on I/O, ``<code>run</code>'' if <i>thr</i> is executing, + * ``<code>aborting</code>'' if <i>thr</i> is aborting, <code>false</code> if + * <i>thr</i> terminated normally, and <code>nil</code> if <i>thr</i> + * terminated with an exception. + * + * a = Thread.new { raise("die now") } + * b = Thread.new { Thread.stop } + * c = Thread.new { Thread.exit } + * d = Thread.new { sleep } + * Thread.critical = true + * d.kill #=> #<Thread:0x401b3678 aborting> + * a.status #=> nil + * b.status #=> "sleep" + * c.status #=> false + * d.status #=> "aborting" + * Thread.current.status #=> "run" + */ + +static VALUE +rb_thread_status(thread) + VALUE thread; +{ + rb_thread_t th = rb_thread_check(thread); + + if (rb_thread_dead(th)) { + if (!NIL_P(th->errinfo) && (th->flags & THREAD_RAISED)) + return Qnil; + return Qfalse; + } + + return rb_str_new2(thread_status_name(th->status)); +} + + +/* + * call-seq: + * thr.alive? => true or false + * + * Returns <code>true</code> if <i>thr</i> is running or sleeping. + * + * thr = Thread.new { } + * thr.join #=> #<Thread:0x401b3fb0 dead> + * Thread.current.alive? #=> true + * thr.alive? #=> false + */ + +static VALUE +rb_thread_alive_p(thread) + VALUE thread; +{ + rb_thread_t th = rb_thread_check(thread); + + if (rb_thread_dead(th)) return Qfalse; + return Qtrue; +} + + +/* + * call-seq: + * thr.stop? => true or false + * + * Returns <code>true</code> if <i>thr</i> is dead or sleeping. + * + * a = Thread.new { Thread.stop } + * b = Thread.current + * a.stop? #=> true + * b.stop? #=> false + */ + +static VALUE +rb_thread_stop_p(thread) + VALUE thread; +{ + rb_thread_t th = rb_thread_check(thread); + + if (rb_thread_dead(th)) return Qtrue; + if (th->status == THREAD_STOPPED) return Qtrue; + return Qfalse; +} + +static void +rb_thread_wait_other_threads() +{ + rb_thread_t th; + int found; + + /* wait other threads to terminate */ + while (curr_thread != curr_thread->next) { + found = 0; + FOREACH_THREAD(th) { + if (th != curr_thread && th->status != THREAD_STOPPED) { + found = 1; + break; + } + } + END_FOREACH(th); + if (!found) return; + rb_thread_schedule(); + } +} + +static void +rb_thread_cleanup() +{ + rb_thread_t curr, th; + + curr = curr_thread; + while (curr->status == THREAD_KILLED) { + curr = curr->prev; + } + + FOREACH_THREAD_FROM(curr, th) { + if (th->status != THREAD_KILLED) { + rb_thread_ready(th); + if (th != main_thread) { + th->thgroup = 0; + th->priority = 0; + th->status = THREAD_TO_KILL; + RDATA(th->thread)->dfree = NULL; + } + } + } + END_FOREACH_FROM(curr, th); +} + +int rb_thread_critical; + + +/* + * call-seq: + * Thread.critical => true or false + * + * Returns the status of the global ``thread critical'' condition. + */ + +static VALUE +rb_thread_critical_get() +{ + return rb_thread_critical?Qtrue:Qfalse; +} + + +/* + * call-seq: + * Thread.critical= boolean => true or false + * + * Sets the status of the global ``thread critical'' condition and returns + * it. When set to <code>true</code>, prohibits scheduling of any existing + * thread. Does not block new threads from being created and run. Certain + * thread operations (such as stopping or killing a thread, sleeping in the + * current thread, and raising an exception) may cause a thread to be scheduled + * even when in a critical section. <code>Thread::critical</code> is not + * intended for daily use: it is primarily there to support folks writing + * threading libraries. + */ + +static VALUE +rb_thread_critical_set(obj, val) + VALUE obj, val; +{ + rb_thread_critical = RTEST(val); + return val; +} + +void +rb_thread_interrupt() +{ + rb_thread_critical = 0; + rb_thread_ready(main_thread); + if (curr_thread == main_thread) { + rb_interrupt(); + } + if (!rb_thread_dead(curr_thread)) { + if (THREAD_SAVE_CONTEXT(curr_thread)) { + return; + } + } + curr_thread = main_thread; + rb_thread_restore_context(curr_thread, RESTORE_INTERRUPT); +} + +void +rb_thread_signal_raise(sig) + char *sig; +{ + if (sig == 0) return; /* should not happen */ + rb_thread_critical = 0; + if (curr_thread == main_thread) { + rb_thread_ready(curr_thread); + rb_raise(rb_eSignal, "SIG%s", sig); + } + rb_thread_ready(main_thread); + if (!rb_thread_dead(curr_thread)) { + if (THREAD_SAVE_CONTEXT(curr_thread)) { + return; + } + } + th_signm = sig; + curr_thread = main_thread; + rb_thread_restore_context(curr_thread, RESTORE_SIGNAL); +} + +void +rb_thread_trap_eval(cmd, sig, safe) + VALUE cmd; + int sig, safe; +{ + rb_thread_critical = 0; + if (curr_thread == main_thread) { + rb_trap_eval(cmd, sig, safe); + return; + } + if (!rb_thread_dead(curr_thread)) { + if (THREAD_SAVE_CONTEXT(curr_thread)) { + return; + } + } + th_cmd = cmd; + th_sig = sig; + th_safe = safe; + curr_thread = main_thread; + rb_thread_restore_context(curr_thread, RESTORE_TRAP); +} + +static VALUE +rb_thread_raise(argc, argv, th) + int argc; + VALUE *argv; + rb_thread_t th; +{ + volatile rb_thread_t th_save = th; + VALUE exc; + + if (!th->next) { + rb_raise(rb_eArgError, "unstarted thread"); + } + if (rb_thread_dead(th)) return Qnil; + exc = rb_make_exception(argc, argv); + if (curr_thread == th) { + rb_raise_jump(exc); + } + + if (!rb_thread_dead(curr_thread)) { + if (THREAD_SAVE_CONTEXT(curr_thread)) { + return th_save->thread; + } + } + + rb_thread_ready(th); + curr_thread = th; + + th_raise_exception = exc; + th_raise_node = ruby_current_node; + rb_thread_restore_context(curr_thread, RESTORE_RAISE); + return Qnil; /* not reached */ +} + + +/* + * call-seq: + * thr.raise(exception) + * + * Raises an exception (see <code>Kernel::raise</code>) from <i>thr</i>. The + * caller does not have to be <i>thr</i>. + * + * Thread.abort_on_exception = true + * a = Thread.new { sleep(200) } + * a.raise("Gotcha") + * + * <em>produces:</em> + * + * prog.rb:3: Gotcha (RuntimeError) + * from prog.rb:2:in `initialize' + * from prog.rb:2:in `new' + * from prog.rb:2 + */ + +static VALUE +rb_thread_raise_m(argc, argv, thread) + int argc; + VALUE *argv; + VALUE thread; +{ + rb_thread_t th = rb_thread_check(thread); + + if (ruby_safe_level > th->safe) { + rb_secure(4); + } + rb_thread_raise(argc, argv, th); + return Qnil; /* not reached */ +} + +VALUE +rb_thread_local_aref(thread, id) + VALUE thread; + ID id; +{ + rb_thread_t th; + VALUE val; + + th = rb_thread_check(thread); + if (ruby_safe_level >= 4 && th != curr_thread) { + rb_raise(rb_eSecurityError, "Insecure: thread locals"); + } + if (!th->locals) return Qnil; + if (st_lookup(th->locals, id, &val)) { + return val; + } + return Qnil; +} + + +/* + * call-seq: + * thr[sym] => obj or nil + * + * Attribute Reference---Returns the value of a thread-local variable, using + * either a symbol or a string name. If the specified variable does not exist, + * returns <code>nil</code>. + * + * a = Thread.new { Thread.current["name"] = "A"; Thread.stop } + * b = Thread.new { Thread.current[:name] = "B"; Thread.stop } + * c = Thread.new { Thread.current["name"] = "C"; Thread.stop } + * Thread.list.each {|x| puts "#{x.inspect}: #{x[:name]}" } + * + * <em>produces:</em> + * + * #<Thread:0x401b3b3c sleep>: C + * #<Thread:0x401b3bc8 sleep>: B + * #<Thread:0x401b3c68 sleep>: A + * #<Thread:0x401bdf4c run>: + */ + +static VALUE +rb_thread_aref(thread, id) + VALUE thread, id; +{ + return rb_thread_local_aref(thread, rb_to_id(id)); +} + +VALUE +rb_thread_local_aset(thread, id, val) + VALUE thread; + ID id; + VALUE val; +{ + rb_thread_t th = rb_thread_check(thread); + + if (ruby_safe_level >= 4 && th != curr_thread) { + rb_raise(rb_eSecurityError, "Insecure: can't modify thread locals"); + } + if (OBJ_FROZEN(thread)) rb_error_frozen("thread locals"); + + if (!th->locals) { + th->locals = st_init_numtable(); + } + if (NIL_P(val)) { + st_delete(th->locals, (st_data_t*)&id, 0); + return Qnil; + } + st_insert(th->locals, id, val); + + return val; +} + + +/* + * call-seq: + * thr[sym] = obj => obj + * + * Attribute Assignment---Sets or creates the value of a thread-local variable, + * using either a symbol or a string. See also <code>Thread#[]</code>. + */ + +static VALUE +rb_thread_aset(thread, id, val) + VALUE thread, id, val; +{ + return rb_thread_local_aset(thread, rb_to_id(id), val); +} + + +/* + * call-seq: + * thr.key?(sym) => true or false + * + * Returns <code>true</code> if the given string (or symbol) exists as a + * thread-local variable. + * + * me = Thread.current + * me[:oliver] = "a" + * me.key?(:oliver) #=> true + * me.key?(:stanley) #=> false + */ + +static VALUE +rb_thread_key_p(thread, id) + VALUE thread, id; +{ + rb_thread_t th = rb_thread_check(thread); + + if (!th->locals) return Qfalse; + if (st_lookup(th->locals, rb_to_id(id), 0)) + return Qtrue; + return Qfalse; +} + +static int +thread_keys_i(key, value, ary) + ID key; + VALUE value, ary; +{ + rb_ary_push(ary, ID2SYM(key)); + return ST_CONTINUE; +} + + +/* + * call-seq: + * thr.keys => array + * + * Returns an an array of the names of the thread-local variables (as Symbols). + * + * thr = Thread.new do + * Thread.current[:cat] = 'meow' + * Thread.current["dog"] = 'woof' + * end + * thr.join #=> #<Thread:0x401b3f10 dead> + * thr.keys #=> [:dog, :cat] + */ + +static VALUE +rb_thread_keys(thread) + VALUE thread; +{ + rb_thread_t th = rb_thread_check(thread); + VALUE ary = rb_ary_new(); + + if (th->locals) { + st_foreach(th->locals, thread_keys_i, ary); + } + return ary; +} + +/* + * call-seq: + * thr.inspect => string + * + * Dump the name, id, and status of _thr_ to a string. + */ + +static VALUE +rb_thread_inspect(thread) + VALUE thread; +{ + char *cname = rb_obj_classname(thread); + rb_thread_t th = rb_thread_check(thread); + const char *status = thread_status_name(th->status); + VALUE str; + + str = rb_str_new(0, strlen(cname)+7+16+9+1); /* 7:tags 16:addr 9:status 1:nul */ + sprintf(RSTRING(str)->ptr, "#<%s:0x%lx %s>", cname, thread, status); + RSTRING(str)->len = strlen(RSTRING(str)->ptr); + OBJ_INFECT(str, thread); + + return str; +} + +void +rb_thread_atfork() +{ + rb_thread_t th; + + if (rb_thread_alone()) return; + FOREACH_THREAD(th) { + if (th != curr_thread) { + rb_thread_die(th); + } + } + END_FOREACH(th); + main_thread = curr_thread; + curr_thread->next = curr_thread; + curr_thread->prev = curr_thread; +} + + +/* + * Document-class: Continuation + * + * Continuation objects are generated by + * <code>Kernel#callcc</code>. They hold a return address and execution + * context, allowing a nonlocal return to the end of the + * <code>callcc</code> block from anywhere within a program. + * Continuations are somewhat analogous to a structured version of C's + * <code>setjmp/longjmp</code> (although they contain more state, so + * you might consider them closer to threads). + * + * For instance: + * + * arr = [ "Freddie", "Herbie", "Ron", "Max", "Ringo" ] + * callcc{|$cc|} + * puts(message = arr.shift) + * $cc.call unless message =~ /Max/ + * + * <em>produces:</em> + * + * Freddie + * Herbie + * Ron + * Max + * + * This (somewhat contrived) example allows the inner loop to abandon + * processing early: + * + * callcc {|cont| + * for i in 0..4 + * print "\n#{i}: " + * for j in i*5...(i+1)*5 + * cont.call() if j == 17 + * printf "%3d", j + * end + * end + * } + * print "\n" + * + * <em>produces:</em> + * + * 0: 0 1 2 3 4 + * 1: 5 6 7 8 9 + * 2: 10 11 12 13 14 + * 3: 15 16 + */ + +static VALUE rb_cCont; + +/* + * call-seq: + * callcc {|cont| block } => obj + * + * Generates a <code>Continuation</code> object, which it passes to the + * associated block. Performing a <em>cont</em><code>.call</code> will + * cause the <code>callcc</code> to return (as will falling through the + * end of the block). The value returned by the <code>callcc</code> is + * the value of the block, or the value passed to + * <em>cont</em><code>.call</code>. See class <code>Continuation</code> + * for more details. Also see <code>Kernel::throw</code> for + * an alternative mechanism for unwinding a call stack. + */ + +static VALUE +rb_callcc(self) + VALUE self; +{ + volatile VALUE cont; + rb_thread_t th; + volatile rb_thread_t th_save; + struct tag *tag; + struct RVarmap *vars; + + THREAD_ALLOC(th); + cont = Data_Wrap_Struct(rb_cCont, thread_mark, thread_free, th); + + scope_dup(ruby_scope); + for (tag=prot_tag; tag; tag=tag->prev) { + scope_dup(tag->scope); + } + th->thread = curr_thread->thread; + th->thgroup = cont_protect; + + for (vars = ruby_dyna_vars; vars; vars = vars->next) { + if (FL_TEST(vars, DVAR_DONT_RECYCLE)) break; + FL_SET(vars, DVAR_DONT_RECYCLE); + } + th_save = th; + if (THREAD_SAVE_CONTEXT(th)) { + return th_save->result; + } + else { + return rb_yield(cont); + } +} + +/* + * call-seq: + * cont.call(args, ...) + * cont[args, ...] + * + * Invokes the continuation. The program continues from the end of the + * <code>callcc</code> block. If no arguments are given, the original + * <code>callcc</code> returns <code>nil</code>. If one argument is + * given, <code>callcc</code> returns it. Otherwise, an array + * containing <i>args</i> is returned. + * + * callcc {|cont| cont.call } #=> nil + * callcc {|cont| cont.call 1 } #=> 1 + * callcc {|cont| cont.call 1, 2, 3 } #=> [1, 2, 3] + */ + +static VALUE +rb_cont_call(argc, argv, cont) + int argc; + VALUE *argv; + VALUE cont; +{ + rb_thread_t th = rb_thread_check(cont); + + if (th->thread != curr_thread->thread) { + rb_raise(rb_eRuntimeError, "continuation called across threads"); + } + if (th->thgroup != cont_protect) { + rb_raise(rb_eRuntimeError, "continuation called across trap"); + } + switch (argc) { + case 0: + th->result = Qnil; + break; + case 1: + th->result = argv[0]; + break; + default: + th->result = rb_ary_new4(argc, argv); + break; + } + + rb_thread_restore_context(th, RESTORE_NORMAL); + return Qnil; +} + +struct thgroup { + int enclosed; + VALUE group; +}; + + +/* + * Document-class: ThreadGroup + * + * <code>ThreadGroup</code> provides a means of keeping track of a number of + * threads as a group. A <code>Thread</code> can belong to only one + * <code>ThreadGroup</code> at a time; adding a thread to a new group will + * remove it from any previous group. + * + * Newly created threads belong to the same group as the thread from which they + * were created. + */ + +static VALUE thgroup_s_alloc _((VALUE)); +static VALUE +thgroup_s_alloc(klass) + VALUE klass; +{ + VALUE group; + struct thgroup *data; + + group = Data_Make_Struct(klass, struct thgroup, 0, free, data); + data->enclosed = 0; + data->group = group; + + return group; +} + + +/* + * call-seq: + * thgrp.list => array + * + * Returns an array of all existing <code>Thread</code> objects that belong to + * this group. + * + * ThreadGroup::Default.list #=> [#<Thread:0x401bdf4c run>] + */ + +static VALUE +thgroup_list(group) + VALUE group; +{ + struct thgroup *data; + rb_thread_t th; + VALUE ary; + + Data_Get_Struct(group, struct thgroup, data); + ary = rb_ary_new(); + + FOREACH_THREAD(th) { + if (th->thgroup == data->group) { + rb_ary_push(ary, th->thread); + } + } + END_FOREACH(th); + + return ary; +} + + +/* + * call-seq: + * thgrp.enclose => thgrp + * + * Prevents threads from being added to or removed from the receiving + * <code>ThreadGroup</code>. New threads can still be started in an enclosed + * <code>ThreadGroup</code>. + * + * ThreadGroup::Default.enclose #=> #<ThreadGroup:0x4029d914> + * thr = Thread::new { Thread.stop } #=> #<Thread:0x402a7210 sleep> + * tg = ThreadGroup::new #=> #<ThreadGroup:0x402752d4> + * tg.add thr + * + * <em>produces:</em> + * + * ThreadError: can't move from the enclosed thread group + */ + +VALUE +thgroup_enclose(group) + VALUE group; +{ + struct thgroup *data; + + Data_Get_Struct(group, struct thgroup, data); + data->enclosed = 1; + + return group; +} + + +/* + * call-seq: + * thgrp.enclosed? => true or false + * + * Returns <code>true</code> if <em>thgrp</em> is enclosed. See also + * ThreadGroup#enclose. + */ + +static VALUE +thgroup_enclosed_p(group) + VALUE group; +{ + struct thgroup *data; + + Data_Get_Struct(group, struct thgroup, data); + if (data->enclosed) return Qtrue; + return Qfalse; +} + + +/* + * call-seq: + * thgrp.add(thread) => thgrp + * + * Adds the given <em>thread</em> to this group, removing it from any other + * group to which it may have previously belonged. + * + * puts "Initial group is #{ThreadGroup::Default.list}" + * tg = ThreadGroup.new + * t1 = Thread.new { sleep } + * t2 = Thread.new { sleep } + * puts "t1 is #{t1}" + * puts "t2 is #{t2}" + * tg.add(t1) + * puts "Initial group now #{ThreadGroup::Default.list}" + * puts "tg group now #{tg.list}" + * + * <em>produces:</em> + * + * Initial group is #<Thread:0x401bdf4c> + * t1 is #<Thread:0x401b3c90> + * t2 is #<Thread:0x401b3c18> + * Initial group now #<Thread:0x401b3c18>#<Thread:0x401bdf4c> + * tg group now #<Thread:0x401b3c90> + */ + +static VALUE +thgroup_add(group, thread) + VALUE group, thread; +{ + rb_thread_t th; + struct thgroup *data; + + rb_secure(4); + th = rb_thread_check(thread); + if (!th->next || !th->prev) { + rb_raise(rb_eTypeError, "wrong argument type %s (expected Thread)", + rb_obj_classname(thread)); + } + + if (OBJ_FROZEN(group)) { + rb_raise(rb_eThreadError, "can't move to the frozen thread group"); + } + Data_Get_Struct(group, struct thgroup, data); + if (data->enclosed) { + rb_raise(rb_eThreadError, "can't move to the enclosed thread group"); + } + + if (!th->thgroup) { + return Qnil; + } + if (OBJ_FROZEN(th->thgroup)) { + rb_raise(rb_eThreadError, "can't move from the frozen thread group"); + } + Data_Get_Struct(th->thgroup, struct thgroup, data); + if (data->enclosed) { + rb_raise(rb_eThreadError, "can't move from the enclosed thread group"); + } + + th->thgroup = group; + return group; +} + +/* variables for recursive traversals */ +static ID recursive_key; +static VALUE recursive_tbl; + + +/* + * +Thread+ encapsulates the behavior of a thread of + * execution, including the main thread of the Ruby script. + * + * In the descriptions of the methods in this class, the parameter _sym_ + * refers to a symbol, which is either a quoted string or a + * +Symbol+ (such as <code>:name</code>). + */ + +void +Init_Thread() +{ + VALUE cThGroup; + + rb_eThreadError = rb_define_class("ThreadError", rb_eStandardError); + rb_cThread = rb_define_class("Thread", rb_cObject); + rb_undef_alloc_func(rb_cThread); + + rb_define_singleton_method(rb_cThread, "new", rb_thread_s_new, -1); + rb_define_method(rb_cThread, "initialize", rb_thread_initialize, -2); + rb_define_singleton_method(rb_cThread, "start", rb_thread_start, -2); + rb_define_singleton_method(rb_cThread, "fork", rb_thread_start, -2); + + rb_define_singleton_method(rb_cThread, "stop", rb_thread_stop, 0); + rb_define_singleton_method(rb_cThread, "kill", rb_thread_s_kill, 1); + rb_define_singleton_method(rb_cThread, "exit", rb_thread_exit, 0); + rb_define_singleton_method(rb_cThread, "pass", rb_thread_pass, 0); + rb_define_singleton_method(rb_cThread, "current", rb_thread_current, 0); + rb_define_singleton_method(rb_cThread, "main", rb_thread_main, 0); + rb_define_singleton_method(rb_cThread, "list", rb_thread_list, 0); + + rb_define_singleton_method(rb_cThread, "critical", rb_thread_critical_get, 0); + rb_define_singleton_method(rb_cThread, "critical=", rb_thread_critical_set, 1); + + rb_define_singleton_method(rb_cThread, "abort_on_exception", rb_thread_s_abort_exc, 0); + rb_define_singleton_method(rb_cThread, "abort_on_exception=", rb_thread_s_abort_exc_set, 1); + + rb_define_method(rb_cThread, "run", rb_thread_run, 0); + rb_define_method(rb_cThread, "wakeup", rb_thread_wakeup, 0); + rb_define_method(rb_cThread, "kill", rb_thread_kill, 0); + rb_define_method(rb_cThread, "terminate", rb_thread_kill, 0); + rb_define_method(rb_cThread, "exit", rb_thread_kill, 0); + rb_define_method(rb_cThread, "value", rb_thread_value, 0); + rb_define_method(rb_cThread, "status", rb_thread_status, 0); + rb_define_method(rb_cThread, "join", rb_thread_join_m, -1); + rb_define_method(rb_cThread, "alive?", rb_thread_alive_p, 0); + rb_define_method(rb_cThread, "stop?", rb_thread_stop_p, 0); + rb_define_method(rb_cThread, "raise", rb_thread_raise_m, -1); + + rb_define_method(rb_cThread, "abort_on_exception", rb_thread_abort_exc, 0); + rb_define_method(rb_cThread, "abort_on_exception=", rb_thread_abort_exc_set, 1); + + rb_define_method(rb_cThread, "priority", rb_thread_priority, 0); + rb_define_method(rb_cThread, "priority=", rb_thread_priority_set, 1); + rb_define_method(rb_cThread, "safe_level", rb_thread_safe_level, 0); + rb_define_method(rb_cThread, "group", rb_thread_group, 0); + + rb_define_method(rb_cThread, "[]", rb_thread_aref, 1); + rb_define_method(rb_cThread, "[]=", rb_thread_aset, 2); + rb_define_method(rb_cThread, "key?", rb_thread_key_p, 1); + rb_define_method(rb_cThread, "keys", rb_thread_keys, 0); + + rb_define_method(rb_cThread, "inspect", rb_thread_inspect, 0); + + rb_cCont = rb_define_class("Continuation", rb_cObject); + rb_undef_alloc_func(rb_cCont); + rb_undef_method(CLASS_OF(rb_cCont), "new"); + rb_define_method(rb_cCont, "call", rb_cont_call, -1); + rb_define_method(rb_cCont, "[]", rb_cont_call, -1); + rb_define_global_function("callcc", rb_callcc, 0); + rb_global_variable(&cont_protect); + + cThGroup = rb_define_class("ThreadGroup", rb_cObject); + rb_define_alloc_func(cThGroup, thgroup_s_alloc); + rb_define_method(cThGroup, "list", thgroup_list, 0); + rb_define_method(cThGroup, "enclose", thgroup_enclose, 0); + rb_define_method(cThGroup, "enclosed?", thgroup_enclosed_p, 0); + rb_define_method(cThGroup, "add", thgroup_add, 1); + thgroup_default = rb_obj_alloc(cThGroup); + rb_define_const(cThGroup, "Default", thgroup_default); + rb_global_variable(&thgroup_default); + + /* allocate main thread */ + main_thread = rb_thread_alloc(rb_cThread); + curr_thread = main_thread->prev = main_thread->next = main_thread; + recursive_key = rb_intern("__recursive_key__"); +} + +/* + * call-seq: + * catch(symbol) {| | block } > obj + * + * +catch+ executes its block. If a +throw+ is + * executed, Ruby searches up its stack for a +catch+ block + * with a tag corresponding to the +throw+'s + * _symbol_. If found, that block is terminated, and + * +catch+ returns the value given to +throw+. If + * +throw+ is not called, the block terminates normally, and + * the value of +catch+ is the value of the last expression + * evaluated. +catch+ expressions may be nested, and the + * +throw+ call need not be in lexical scope. + * + * def routine(n) + * puts n + * throw :done if n <= 0 + * routine(n-1) + * end + * + * + * catch(:done) { routine(3) } + * + * <em>produces:</em> + * + * 3 + * 2 + * 1 + * 0 + */ + +static VALUE +rb_f_catch(dmy, tag) + VALUE dmy, tag; +{ + int state; + VALUE val = Qnil; /* OK */ + + tag = ID2SYM(rb_to_id(tag)); + PUSH_TAG(tag); + if ((state = EXEC_TAG()) == 0) { + val = rb_yield_0(tag, 0, 0, 0, Qfalse); + } + else if (state == TAG_THROW && tag == prot_tag->dst) { + val = prot_tag->retval; + state = 0; + } + POP_TAG(); + if (state) JUMP_TAG(state); + + return val; +} + +static VALUE +catch_i(tag) + VALUE tag; +{ + return rb_funcall(Qnil, rb_intern("catch"), 1, tag); +} + +VALUE +rb_catch(tag, func, data) + const char *tag; + VALUE (*func)(); + VALUE data; +{ + return rb_iterate((VALUE(*)_((VALUE)))catch_i, ID2SYM(rb_intern(tag)), func, data); +} + +/* + * call-seq: + * throw(symbol [, obj]) + * + * Transfers control to the end of the active +catch+ block + * waiting for _symbol_. Raises +NameError+ if there + * is no +catch+ block for the symbol. The optional second + * parameter supplies a return value for the +catch+ block, + * which otherwise defaults to +nil+. For examples, see + * <code>Kernel::catch</code>. + */ + +static VALUE +rb_f_throw(argc, argv) + int argc; + VALUE *argv; +{ + VALUE tag, value; + struct tag *tt = prot_tag; + + rb_scan_args(argc, argv, "11", &tag, &value); + tag = ID2SYM(rb_to_id(tag)); + + while (tt) { + if (tt->tag == tag) { + tt->dst = tag; + tt->retval = value; + break; + } + if (tt->tag == PROT_THREAD) { + rb_raise(rb_eThreadError, "uncaught throw `%s' in thread 0x%lx", + rb_id2name(SYM2ID(tag)), + curr_thread); + } + tt = tt->prev; + } + if (!tt) { + rb_name_error(SYM2ID(tag), "uncaught throw `%s'", rb_id2name(SYM2ID(tag))); + } + rb_trap_restore_mask(); + JUMP_TAG(TAG_THROW); +#ifndef __GNUC__ + return Qnil; /* not reached */ +#endif +} + +void +rb_throw(tag, val) + const char *tag; + VALUE val; +{ + VALUE argv[2]; + + argv[0] = ID2SYM(rb_intern(tag)); + argv[1] = val; + rb_f_throw(2, argv); +} + +static VALUE +recursive_check(obj) + VALUE obj; +{ + VALUE hash = rb_thread_local_aref(rb_thread_current(), recursive_key); + + if (NIL_P(hash) || TYPE(hash) != T_HASH) { + return Qfalse; + } + else { + VALUE list = rb_hash_aref(hash, ID2SYM(ruby_frame->this_func)); + + if (NIL_P(list) || TYPE(list) != T_ARRAY) return Qfalse; + return rb_ary_includes(list, rb_obj_id(obj)); + } +} + +static void +recursive_push(obj) + VALUE obj; +{ + VALUE hash = rb_thread_local_aref(rb_thread_current(), recursive_key); + VALUE list, sym; + + sym = ID2SYM(ruby_frame->this_func); + if (NIL_P(hash) || TYPE(hash) != T_HASH) { + hash = rb_hash_new(); + rb_thread_local_aset(rb_thread_current(), recursive_key, hash); + list = Qnil; + } + else { + list = rb_hash_aref(hash, sym); + } + if (NIL_P(list) || TYPE(list) != T_ARRAY) { + list = rb_ary_new(); + rb_hash_aset(hash, sym, list); + } + rb_ary_push(list, rb_obj_id(obj)); +} + +static void +recursive_pop() +{ + VALUE hash = rb_thread_local_aref(rb_thread_current(), recursive_key); + VALUE list, sym; + + sym = ID2SYM(ruby_frame->this_func); + if (NIL_P(hash) || TYPE(hash) != T_HASH) { + VALUE symname = rb_inspect(sym); + VALUE thrname = rb_inspect(rb_thread_current()); + rb_raise(rb_eTypeError, "invalid inspect_tbl hash for %s in %s", + StringValuePtr(symname), StringValuePtr(thrname)); + } + list = rb_hash_aref(hash, sym); + if (NIL_P(list) || TYPE(list) != T_ARRAY) { + VALUE symname = rb_inspect(sym); + VALUE thrname = rb_inspect(rb_thread_current()); + rb_raise(rb_eTypeError, "invalid inspect_tbl list for %s in %s", + StringValuePtr(symname), StringValuePtr(thrname)); + } + rb_ary_pop(list); +} + +VALUE +rb_exec_recursive(func, obj, arg) + VALUE (*func)(ANYARGS); /* VALUE obj, VALUE arg, int flag */ + VALUE obj, arg; +{ + if (recursive_check(obj)) { + return (*func)(obj, arg, Qtrue); + } + else { + VALUE result; + int state; + + recursive_push(obj); + PUSH_TAG(PROT_NONE); + if ((state = EXEC_TAG()) == 0) { + result = (*func)(obj, arg, Qfalse); + } + POP_TAG(); + recursive_pop(); + if (state) JUMP_TAG(state); + return result; + } +} +/********************************************************************** + + file.c - + + $Author: nobu $ + $Date: 2005/04/18 15:01:19 $ + created at: Mon Nov 15 12:24:34 JST 1993 + + Copyright (C) 1993-2003 Yukihiro Matsumoto + Copyright (C) 2000 Network Applied Communication Laboratory, Inc. + Copyright (C) 2000 Information-technology Promotion Agency, Japan + +**********************************************************************/ + +#ifdef _WIN32 +#include "missing/file.h" +#endif + +#include "ruby.h" +#include "rubyio.h" +#include "rubysig.h" +#include "util.h" +#include "dln.h" + +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif + +#ifdef HAVE_SYS_FILE_H +# include <sys/file.h> +#else +int flock _((int, int)); +#endif + +#ifdef HAVE_SYS_PARAM_H +# include <sys/param.h> +#endif +#ifndef MAXPATHLEN +# define MAXPATHLEN 1024 +#endif + +#include <time.h> + +VALUE rb_time_new _((time_t, time_t)); + +#ifdef HAVE_UTIME_H +#include <utime.h> +#elif defined HAVE_SYS_UTIME_H +#include <sys/utime.h> +#endif + +#ifdef HAVE_PWD_H +#include <pwd.h> +#endif + +#ifndef HAVE_STRING_H +char *strrchr _((const char*,const char)); +#endif + +#include <sys/types.h> +#include <sys/stat.h> + +#ifdef HAVE_SYS_MKDEV_H +#include <sys/mkdev.h> +#endif + +#if !defined HAVE_LSTAT && !defined lstat +#define lstat stat +#endif + +VALUE rb_cFile; +VALUE rb_mFileTest; +static VALUE rb_cStat; + +VALUE +rb_get_path(obj) + VALUE obj; +{ + VALUE tmp; + static ID to_path; + + rb_check_safe_obj(obj); + tmp = rb_check_string_type(obj); + if (!NIL_P(tmp)) goto exit; + + if (!to_path) { + to_path = rb_intern("to_path"); + } + if (rb_respond_to(obj, to_path)) { + obj = rb_funcall(obj, to_path, 0, 0); + } + tmp = rb_str_to_str(obj); + exit: + if (obj != tmp) { + rb_check_safe_obj(tmp); + } + return tmp; +} + +static long +apply2files(func, vargs, arg) + void (*func)(); + VALUE vargs; + void *arg; +{ + long i; + VALUE path; + struct RArray *args = RARRAY(vargs); + + rb_secure(4); + for (i=0; i<args->len; i++) { + path = rb_get_path(args->ptr[i]); + (*func)(StringValueCStr(path), arg); + } + + return args->len; +} + +/* + * call-seq: + * file.path -> filename + * + * Returns the pathname used to create <i>file</i> as a string. Does + * not normalize the name. + * + * File.new("testfile").path #=> "testfile" + * File.new("/tmp/../tmp/xxx", "w").path #=> "/tmp/../tmp/xxx" + * + */ + +static VALUE +rb_file_path(obj) + VALUE obj; +{ + OpenFile *fptr; + + fptr = RFILE(rb_io_taint_check(obj))->fptr; + rb_io_check_initialized(fptr); + if (!fptr->path) return Qnil; + return rb_tainted_str_new2(fptr->path); +} + +static VALUE +stat_new_0(klass, st) + VALUE klass; + struct stat *st; +{ + struct stat *nst = 0; + + if (st) { + nst = ALLOC(struct stat); + *nst = *st; + } + return Data_Wrap_Struct(klass, NULL, free, nst); +} + +static VALUE +stat_new(st) + struct stat *st; +{ + return stat_new_0(rb_cStat, st); +} + +static struct stat* +get_stat(self) + VALUE self; +{ + struct stat* st; + Data_Get_Struct(self, struct stat, st); + if (!st) rb_raise(rb_eTypeError, "uninitialized File::Stat"); + return st; +} + +/* + * call-seq: + * stat <=> other_stat => -1, 0, 1 + * + * Compares <code>File::Stat</code> objects by comparing their + * respective modification times. + * + * f1 = File.new("f1", "w") + * sleep 1 + * f2 = File.new("f2", "w") + * f1.stat <=> f2.stat #=> -1 + */ + +static VALUE +rb_stat_cmp(self, other) + VALUE self, other; +{ + if (rb_obj_is_kind_of(other, rb_obj_class(self))) { + time_t t1 = get_stat(self)->st_mtime; + time_t t2 = get_stat(other)->st_mtime; + if (t1 == t2) + return INT2FIX(0); + else if (t1 < t2) + return INT2FIX(-1); + else + return INT2FIX(1); + } + return Qnil; +} + +/* + * call-seq: + * stat.dev => fixnum + * + * Returns an integer representing the device on which <i>stat</i> + * resides. + * + * File.stat("testfile").dev #=> 774 + */ + +static VALUE +rb_stat_dev(self) + VALUE self; +{ + return INT2NUM(get_stat(self)->st_dev); +} + +/* + * call-seq: + * stat.dev_major => fixnum + * + * Returns the major part of <code>File_Stat#dev</code> or + * <code>nil</code>. + * + * File.stat("/dev/fd1").dev_major #=> 2 + * File.stat("/dev/tty").dev_major #=> 5 + */ + +static VALUE +rb_stat_dev_major(self) + VALUE self; +{ +#if defined(major) + long dev = get_stat(self)->st_dev; + return ULONG2NUM(major(dev)); +#else + return Qnil; +#endif +} + +/* + * call-seq: + * stat.dev_minor => fixnum + * + * Returns the minor part of <code>File_Stat#dev</code> or + * <code>nil</code>. + * + * File.stat("/dev/fd1").dev_minor #=> 1 + * File.stat("/dev/tty").dev_minor #=> 0 + */ + +static VALUE +rb_stat_dev_minor(self) + VALUE self; +{ +#if defined(minor) + long dev = get_stat(self)->st_dev; + return ULONG2NUM(minor(dev)); +#else + return Qnil; +#endif +} + + +/* + * call-seq: + * stat.ino => fixnum + * + * Returns the inode number for <i>stat</i>. + * + * File.stat("testfile").ino #=> 1083669 + * + */ + +static VALUE +rb_stat_ino(self) + VALUE self; +{ +#ifdef HUGE_ST_INO + return ULL2NUM(get_stat(self)->st_ino); +#else + return ULONG2NUM(get_stat(self)->st_ino); +#endif +} + +/* + * call-seq: + * stat.mode => fixnum + * + * Returns an integer representing the permission bits of + * <i>stat</i>. The meaning of the bits is platform dependent; on + * Unix systems, see <code>stat(2)</code>. + * + * File.chmod(0644, "testfile") #=> 1 + * s = File.stat("testfile") + * sprintf("%o", s.mode) #=> "100644" + */ + +static VALUE +rb_stat_mode(self) + VALUE self; +{ +#ifdef __BORLANDC__ + return UINT2NUM((unsigned short)(get_stat(self)->st_mode)); +#else + return UINT2NUM(get_stat(self)->st_mode); +#endif +} + +/* + * call-seq: + * stat.nlink => fixnum + * + * Returns the number of hard links to <i>stat</i>. + * + * File.stat("testfile").nlink #=> 1 + * File.link("testfile", "testfile.bak") #=> 0 + * File.stat("testfile").nlink #=> 2 + * + */ + +static VALUE +rb_stat_nlink(self) + VALUE self; +{ + return UINT2NUM(get_stat(self)->st_nlink); +} + + +/* + * call-seq: + * stat.uid => fixnum + * + * Returns the numeric user id of the owner of <i>stat</i>. + * + * File.stat("testfile").uid #=> 501 + * + */ + +static VALUE +rb_stat_uid(self) + VALUE self; +{ + return UINT2NUM(get_stat(self)->st_uid); +} + +/* + * call-seq: + * stat.gid => fixnum + * + * Returns the numeric group id of the owner of <i>stat</i>. + * + * File.stat("testfile").gid #=> 500 + * + */ + +static VALUE +rb_stat_gid(self) + VALUE self; +{ + return UINT2NUM(get_stat(self)->st_gid); +} + + +/* + * call-seq: + * stat.rdev => fixnum or nil + * + * Returns an integer representing the device type on which + * <i>stat</i> resides. Returns <code>nil</code> if the operating + * system doesn't support this feature. + * + * File.stat("/dev/fd1").rdev #=> 513 + * File.stat("/dev/tty").rdev #=> 1280 + */ + +static VALUE +rb_stat_rdev(self) + VALUE self; +{ +#ifdef HAVE_ST_RDEV + return ULONG2NUM(get_stat(self)->st_rdev); +#else + return Qnil; +#endif +} + +/* + * call-seq: + * stat.rdev_major => fixnum + * + * Returns the major part of <code>File_Stat#rdev</code> or + * <code>nil</code>. + * + * File.stat("/dev/fd1").rdev_major #=> 2 + * File.stat("/dev/tty").rdev_major #=> 5 + */ + +static VALUE +rb_stat_rdev_major(self) + VALUE self; +{ +#if defined(HAVE_ST_RDEV) && defined(major) + long rdev = get_stat(self)->st_rdev; + return ULONG2NUM(major(rdev)); +#else + return Qnil; +#endif +} + +/* + * call-seq: + * stat.rdev_minor => fixnum + * + * Returns the minor part of <code>File_Stat#rdev</code> or + * <code>nil</code>. + * + * File.stat("/dev/fd1").rdev_minor #=> 1 + * File.stat("/dev/tty").rdev_minor #=> 0 + */ + +static VALUE +rb_stat_rdev_minor(self) + VALUE self; +{ +#if defined(HAVE_ST_RDEV) && defined(minor) + long rdev = get_stat(self)->st_rdev; + return ULONG2NUM(minor(rdev)); +#else + return Qnil; +#endif +} + +/* + * call-seq: + * stat.size => fixnum + * + * Returns the size of <i>stat</i> in bytes. + * + * File.stat("testfile").size #=> 66 + */ + +static VALUE +rb_stat_size(self) + VALUE self; +{ + return OFFT2NUM(get_stat(self)->st_size); +} + +/* + * call-seq: + * stat.blksize => integer or nil + * + * Returns the native file system's block size. Will return <code>nil</code> + * on platforms that don't support this information. + * + * File.stat("testfile").blksize #=> 4096 + * + */ + +static VALUE +rb_stat_blksize(self) + VALUE self; +{ +#ifdef HAVE_ST_BLKSIZE + return ULONG2NUM(get_stat(self)->st_blksize); +#else + return Qnil; +#endif +} + +/* + * call-seq: + * stat.blocks => integer or nil + * + * Returns the number of native file system blocks allocated for this + * file, or <code>nil</code> if the operating system doesn't + * support this feature. + * + * File.stat("testfile").blocks #=> 2 + */ + +static VALUE +rb_stat_blocks(self) + VALUE self; +{ +#ifdef HAVE_ST_BLOCKS + return ULONG2NUM(get_stat(self)->st_blocks); +#else + return Qnil; +#endif +} + + +/* + * call-seq: + * stat.atime => time + * + * Returns the last access time for this file as an object of class + * <code>Time</code>. + * + * File.stat("testfile").atime #=> Wed Dec 31 18:00:00 CST 1969 + * + */ + +static VALUE +rb_stat_atime(self) + VALUE self; +{ + return rb_time_new(get_stat(self)->st_atime, 0); +} + +/* + * call-seq: + * stat.mtime -> aTime + * + * Returns the modification time of <i>stat</i>. + * + * File.stat("testfile").mtime #=> Wed Apr 09 08:53:14 CDT 2003 + * + */ + +static VALUE +rb_stat_mtime(self) + VALUE self; +{ + return rb_time_new(get_stat(self)->st_mtime, 0); +} + +/* + * call-seq: + * stat.ctime -> aTime + * + * Returns the change time for <i>stat</i> (that is, the time + * directory information about the file was changed, not the file + * itself). + * + * File.stat("testfile").ctime #=> Wed Apr 09 08:53:14 CDT 2003 + * + */ + +static VALUE +rb_stat_ctime(self) + VALUE self; +{ + return rb_time_new(get_stat(self)->st_ctime, 0); +} + +/* + * call-seq: + * stat.inspect => string + * + * Produce a nicely formatted description of <i>stat</i>. + * + * File.stat("/etc/passwd").inspect + * #=> "#<File::Stat dev=0xe000005, ino=1078078, mode=0100644, + * nlink=1, uid=0, gid=0, rdev=0x0, size=1374, blksize=4096, + * blocks=8, atime=Wed Dec 10 10:16:12 CST 2003, + * mtime=Fri Sep 12 15:41:41 CDT 2003, + * ctime=Mon Oct 27 11:20:27 CST 2003>" + */ + +static VALUE +rb_stat_inspect(self) + VALUE self; +{ + VALUE str; + int i; + static struct { + char *name; + VALUE (*func)(); + } member[] = { + {"dev", rb_stat_dev}, + {"ino", rb_stat_ino}, + {"mode", rb_stat_mode}, + {"nlink", rb_stat_nlink}, + {"uid", rb_stat_uid}, + {"gid", rb_stat_gid}, + {"rdev", rb_stat_rdev}, + {"size", rb_stat_size}, + {"blksize", rb_stat_blksize}, + {"blocks", rb_stat_blocks}, + {"atime", rb_stat_atime}, + {"mtime", rb_stat_mtime}, + {"ctime", rb_stat_ctime}, + }; + + str = rb_str_buf_new2("#<"); + rb_str_buf_cat2(str, rb_obj_classname(self)); + rb_str_buf_cat2(str, " "); + + for (i = 0; i < sizeof(member)/sizeof(member[0]); i++) { + VALUE v; + + if (i > 0) { + rb_str_buf_cat2(str, ", "); + } + rb_str_buf_cat2(str, member[i].name); + rb_str_buf_cat2(str, "="); + v = (*member[i].func)(self); + if (i == 2) { /* mode */ + char buf[32]; + + sprintf(buf, "0%lo", NUM2ULONG(v)); + rb_str_buf_cat2(str, buf); + } + else if (i == 0 || i == 6) { /* dev/rdev */ + char buf[32]; + + sprintf(buf, "0x%lx", NUM2ULONG(v)); + rb_str_buf_cat2(str, buf); + } + else { + rb_str_append(str, rb_inspect(v)); + } + } + rb_str_buf_cat2(str, ">"); + OBJ_INFECT(str, self); + + return str; +} + +static int +rb_stat(file, st) + VALUE file; + struct stat *st; +{ + VALUE tmp; + + rb_secure(2); + tmp = rb_check_convert_type(file, T_FILE, "IO", "to_io"); + if (!NIL_P(tmp)) { + OpenFile *fptr; + + GetOpenFile(tmp, fptr); + return fstat(fptr->fd, st); + } + FilePathValue(file); + return stat(StringValueCStr(file), st); +} + +/* + * call-seq: + * File.stat(file_name) => stat + * + * Returns a <code>File::Stat</code> object for the named file (see + * <code>File::Stat</code>). + * + * File.stat("testfile").mtime #=> Tue Apr 08 12:58:04 CDT 2003 + * + */ + +static VALUE +rb_file_s_stat(klass, fname) + VALUE klass, fname; +{ + struct stat st; + + rb_secure(4); + FilePathValue(fname); + if (rb_stat(fname, &st) < 0) { + rb_sys_fail(StringValueCStr(fname)); + } + return stat_new(&st); +} + +/* + * call-seq: + * ios.stat => stat + * + * Returns status information for <em>ios</em> as an object of type + * <code>File::Stat</code>. + * + * f = File.new("testfile") + * s = f.stat + * "%o" % s.mode #=> "100644" + * s.blksize #=> 4096 + * s.atime #=> Wed Apr 09 08:53:54 CDT 2003 + * + */ + +static VALUE +rb_io_stat(obj) + VALUE obj; +{ + OpenFile *fptr; + struct stat st; + + GetOpenFile(obj, fptr); + if (fstat(fptr->fd, &st) == -1) { + rb_sys_fail(fptr->path); + } + return stat_new(&st); +} + +/* + * call-seq: + * File.lstat(file_name) => stat + * + * Same as <code>File::stat</code>, but does not follow the last symbolic + * link. Instead, reports on the link itself. + * + * File.symlink("testfile", "link2test") #=> 0 + * File.stat("testfile").size #=> 66 + * File.lstat("link2test").size #=> 8 + * File.stat("link2test").size #=> 66 + * + */ + +static VALUE +rb_file_s_lstat(klass, fname) + VALUE klass, fname; +{ +#ifdef HAVE_LSTAT + struct stat st; + + rb_secure(2); + FilePathValue(fname); + if (lstat(StringValueCStr(fname), &st) == -1) { + rb_sys_fail(RSTRING(fname)->ptr); + } + return stat_new(&st); +#else + return rb_file_s_stat(klass, fname); +#endif +} + + +/* + * call-seq: + * file.lstat => stat + * + * Same as <code>IO#stat</code>, but does not follow the last symbolic + * link. Instead, reports on the link itself. + * + * File.symlink("testfile", "link2test") #=> 0 + * File.stat("testfile").size #=> 66 + * f = File.new("link2test") + * f.lstat.size #=> 8 + * f.stat.size #=> 66 + */ + +static VALUE +rb_file_lstat(obj) + VALUE obj; +{ +#ifdef HAVE_LSTAT + OpenFile *fptr; + struct stat st; + + rb_secure(2); + GetOpenFile(obj, fptr); + if (!fptr->path) return Qnil; + if (lstat(fptr->path, &st) == -1) { + rb_sys_fail(fptr->path); + } + return stat_new(&st); +#else + return rb_io_stat(obj); +#endif +} + +static int +group_member(gid) + GETGROUPS_T gid; +{ +#ifndef _WIN32 + if (getgid() == gid) + return Qtrue; + +# ifdef HAVE_GETGROUPS +# ifndef NGROUPS +# ifdef NGROUPS_MAX +# define NGROUPS NGROUPS_MAX +# else +# define NGROUPS 32 +# endif +# endif + { + GETGROUPS_T gary[NGROUPS]; + int anum; + + anum = getgroups(NGROUPS, gary); + while (--anum >= 0) + if (gary[anum] == gid) + return Qtrue; + } +# endif +#endif + return Qfalse; +} + +#ifndef S_IXUGO +# define S_IXUGO (S_IXUSR | S_IXGRP | S_IXOTH) +#endif + +int +eaccess(path, mode) + const char *path; + int mode; +{ +#if defined(S_IXGRP) && !defined(_WIN32) && !defined(__CYGWIN__) + struct stat st; + int euid; + + if (stat(path, &st) < 0) return -1; + + euid = geteuid(); + + if (euid == 0) { + /* Root can read or write any file. */ + if (!(mode & X_OK)) + return 0; + + /* Root can execute any file that has any one of the execute + bits set. */ + if (st.st_mode & S_IXUGO) + return 0; + + return -1; + } + + if (st.st_uid == euid) /* owner */ + mode <<= 6; + else if (getegid() == st.st_gid || group_member(st.st_gid)) + mode <<= 3; + + if ((st.st_mode & mode) == mode) return 0; + + return -1; +#else +# if _MSC_VER >= 1400 + mode &= 6; +# endif + return access(path, mode); +#endif +} + + +/* + * Document-class: FileTest + * + * <code>FileTest</code> implements file test operations similar to + * those used in <code>File::Stat</code>. It exists as a standalone + * module, and its methods are also insinuated into the <code>File</code> + * class. (Note that this is not done by inclusion: the interpreter cheats). + * + */ + + +/* + * call-seq: + * File.directory?(file_name) => true or false + * + * Returns <code>true</code> if the named file is a directory, + * <code>false</code> otherwise. + * + * File.directory?(".") + */ + +static VALUE +test_d(obj, fname) + VALUE obj, fname; +{ +#ifndef S_ISDIR +# define S_ISDIR(m) ((m & S_IFMT) == S_IFDIR) +#endif + + struct stat st; + + if (rb_stat(fname, &st) < 0) return Qfalse; + if (S_ISDIR(st.st_mode)) return Qtrue; + return Qfalse; +} + +/* + * call-seq: + * File.pipe?(file_name) => true or false + * + * Returns <code>true</code> if the named file is a pipe. + */ + +static VALUE +test_p(obj, fname) + VALUE obj, fname; +{ +#ifdef S_IFIFO +# ifndef S_ISFIFO +# define S_ISFIFO(m) ((m & S_IFMT) == S_IFIFO) +# endif + + struct stat st; + + if (rb_stat(fname, &st) < 0) return Qfalse; + if (S_ISFIFO(st.st_mode)) return Qtrue; + +#endif + return Qfalse; +} + +/* + * call-seq: + * File.symlink?(file_name) => true or false + * + * Returns <code>true</code> if the named file is a symbolic link. + */ + +static VALUE +test_l(obj, fname) + VALUE obj, fname; +{ +#ifndef S_ISLNK +# ifdef _S_ISLNK +# define S_ISLNK(m) _S_ISLNK(m) +# else +# ifdef _S_IFLNK +# define S_ISLNK(m) ((m & S_IFMT) == _S_IFLNK) +# else +# ifdef S_IFLNK +# define S_ISLNK(m) ((m & S_IFMT) == S_IFLNK) +# endif +# endif +# endif +#endif + +#ifdef S_ISLNK + struct stat st; + + rb_secure(2); + FilePathValue(fname); + if (lstat(StringValueCStr(fname), &st) < 0) return Qfalse; + if (S_ISLNK(st.st_mode)) return Qtrue; +#endif + + return Qfalse; +} + +/* + * call-seq: + * File.socket?(file_name) => true or false + * + * Returns <code>true</code> if the named file is a socket. + */ + +static VALUE +test_S(obj, fname) + VALUE obj, fname; +{ +#ifndef S_ISSOCK +# ifdef _S_ISSOCK +# define S_ISSOCK(m) _S_ISSOCK(m) +# else +# ifdef _S_IFSOCK +# define S_ISSOCK(m) ((m & S_IFMT) == _S_IFSOCK) +# else +# ifdef S_IFSOCK +# define S_ISSOCK(m) ((m & S_IFMT) == S_IFSOCK) +# endif +# endif +# endif +#endif + +#ifdef S_ISSOCK + struct stat st; + + if (rb_stat(fname, &st) < 0) return Qfalse; + if (S_ISSOCK(st.st_mode)) return Qtrue; + +#endif + return Qfalse; +} + +/* + * call-seq: + * File.blockdev?(file_name) => true or false + * + * Returns <code>true</code> if the named file is a block device. + */ + +static VALUE +test_b(obj, fname) + VALUE obj, fname; +{ +#ifndef S_ISBLK +# ifdef S_IFBLK +# define S_ISBLK(m) ((m & S_IFMT) == S_IFBLK) +# else +# define S_ISBLK(m) (0) /* anytime false */ +# endif +#endif + +#ifdef S_ISBLK + struct stat st; + + if (rb_stat(fname, &st) < 0) return Qfalse; + if (S_ISBLK(st.st_mode)) return Qtrue; + +#endif + return Qfalse; +} + +/* + * call-seq: + * File.chardev?(file_name) => true or false + * + * Returns <code>true</code> if the named file is a character device. + */ +static VALUE +test_c(obj, fname) + VALUE obj, fname; +{ +#ifndef S_ISCHR +# define S_ISCHR(m) ((m & S_IFMT) == S_IFCHR) +#endif + + struct stat st; + + if (rb_stat(fname, &st) < 0) return Qfalse; + if (S_ISCHR(st.st_mode)) return Qtrue; + + return Qfalse; +} + + +/* + * call-seq: + * File.exist?(file_name) => true or false + * File.exists?(file_name) => true or false (obsolete) + * + * Return <code>true</code> if the named file exists. + */ + +static VALUE +test_e(obj, fname) + VALUE obj, fname; +{ + struct stat st; + + if (rb_stat(fname, &st) < 0) return Qfalse; + return Qtrue; +} + +/* + * call-seq: + * File.readable?(file_name) => true or false + * + * Returns <code>true</code> if the named file is readable by the effective + * user id of this process. + */ + +static VALUE +test_r(obj, fname) + VALUE obj, fname; +{ + rb_secure(2); + FilePathValue(fname); + if (eaccess(StringValueCStr(fname), R_OK) < 0) return Qfalse; + return Qtrue; +} + +/* + * call-seq: + * File.readable_real?(file_name) => true or false + * + * Returns <code>true</code> if the named file is readable by the real + * user id of this process. + */ + +static VALUE +test_R(obj, fname) + VALUE obj, fname; +{ + rb_secure(2); + FilePathValue(fname); + if (access(StringValueCStr(fname), R_OK) < 0) return Qfalse; + return Qtrue; +} + +#ifndef S_IRUGO +# define S_IRUGO (S_IRUSR | S_IRGRP | S_IROTH) +#endif + +#ifndef S_IWUGO +# define S_IWUGO (S_IWUSR | S_IWGRP | S_IWOTH) +#endif + +/* + * call-seq: + * File.world_readable?(file_name) => fixnum or nil + * + * If <i>file_name</i> is readable by others, returns an integer + * representing the file permission bits of <i>file_name</i>. Returns + * <code>nil</code> otherwise. The meaning of the bits is platform + * dependent; on Unix systems, see <code>stat(2)</code>. + * + * File.world_readable?("/etc/passwd") # => 420 + * m = File.world_readable?("/etc/passwd") + * sprintf("%o", m) # => "644" + */ + +static VALUE +test_wr(obj, fname) + VALUE obj, fname; +{ +#ifdef S_IROTH + struct stat st; + + if (rb_stat(fname, &st) < 0) return Qnil; + if ((st.st_mode & (S_IROTH)) == S_IROTH) { + return UINT2NUM(st.st_mode & (S_IRUGO|S_IWUGO|S_IXUGO)); + } +#endif + return Qnil; +} + +/* + * call-seq: + * File.writable?(file_name) => true or false + * + * Returns <code>true</code> if the named file is writable by the effective + * user id of this process. + */ + +static VALUE +test_w(obj, fname) + VALUE obj, fname; +{ + rb_secure(2); + FilePathValue(fname); + if (eaccess(StringValueCStr(fname), W_OK) < 0) return Qfalse; + return Qtrue; +} + +/* + * call-seq: + * File.writable_real?(file_name) => true or false + * + * Returns <code>true</code> if the named file is writable by the real + * user id of this process. + */ + +static VALUE +test_W(obj, fname) + VALUE obj, fname; +{ + rb_secure(2); + FilePathValue(fname); + if (access(StringValueCStr(fname), W_OK) < 0) return Qfalse; + return Qtrue; +} + +/* + * call-seq: + * File.world_writable?(file_name) => fixnum or nil + * + * If <i>file_name</i> is writable by others, returns an integer + * representing the file permission bits of <i>file_name</i>. Returns + * <code>nil</code> otherwise. The meaning of the bits is platform + * dependent; on Unix systems, see <code>stat(2)</code>. + * + * File.world_writable?("/tmp") #=> 511 + * m = File.world_writable?("/tmp") + * sprintf("%o", m) #=> "777" + */ + +static VALUE +test_ww(obj, fname) + VALUE obj, fname; +{ +#ifdef S_IWOTH + struct stat st; + + if (rb_stat(fname, &st) < 0) return Qfalse; + if ((st.st_mode & (S_IWOTH)) == S_IWOTH) { + return UINT2NUM(st.st_mode & (S_IRUGO|S_IWUGO|S_IXUGO)); + } +#endif + return Qnil; +} + +/* + * call-seq: + * File.executable?(file_name) => true or false + * + * Returns <code>true</code> if the named file is executable by the effective + * user id of this process. + */ + +static VALUE +test_x(obj, fname) + VALUE obj, fname; +{ + rb_secure(2); + FilePathValue(fname); + if (eaccess(StringValueCStr(fname), X_OK) < 0) return Qfalse; + return Qtrue; +} + +/* + * call-seq: + * File.executable_real?(file_name) => true or false + * + * Returns <code>true</code> if the named file is executable by the real + * user id of this process. + */ + +static VALUE +test_X(obj, fname) + VALUE obj, fname; +{ + rb_secure(2); + FilePathValue(fname); + if (access(StringValueCStr(fname), X_OK) < 0) return Qfalse; + return Qtrue; +} + +#ifndef S_ISREG +# define S_ISREG(m) ((m & S_IFMT) == S_IFREG) +#endif + +/* + * call-seq: + * File.file?(file_name) => true or false + * + * Returns <code>true</code> if the named file exists and is a + * regular file. + */ + +static VALUE +test_f(obj, fname) + VALUE obj, fname; +{ + struct stat st; + + if (rb_stat(fname, &st) < 0) return Qfalse; + if (S_ISREG(st.st_mode)) return Qtrue; + return Qfalse; +} + +/* + * call-seq: + * File.zero?(file_name) => true or false + * + * Returns <code>true</code> if the named file exists and has + * a zero size. + */ + +static VALUE +test_z(obj, fname) + VALUE obj, fname; +{ + struct stat st; + + if (rb_stat(fname, &st) < 0) return Qfalse; + if (st.st_size == 0) return Qtrue; + return Qfalse; +} + +/* + * call-seq: + * File.file?(file_name) => integer or nil + * + * Returns <code>nil</code> if <code>file_name</code> doesn't + * exist or has zero size, the size of the file otherwise. + */ + +static VALUE +test_s(obj, fname) + VALUE obj, fname; +{ + struct stat st; + + if (rb_stat(fname, &st) < 0) return Qnil; + if (st.st_size == 0) return Qnil; + return OFFT2NUM(st.st_size); +} + +/* + * call-seq: + * File.owned?(file_name) => true or false + * + * Returns <code>true</code> if the named file exists and the + * effective used id of the calling process is the owner of + * the file. + */ + +static VALUE +test_owned(obj, fname) + VALUE obj, fname; +{ + struct stat st; + + if (rb_stat(fname, &st) < 0) return Qfalse; + if (st.st_uid == geteuid()) return Qtrue; + return Qfalse; +} + +static VALUE +test_rowned(obj, fname) + VALUE obj, fname; +{ + struct stat st; + + if (rb_stat(fname, &st) < 0) return Qfalse; + if (st.st_uid == getuid()) return Qtrue; + return Qfalse; +} + +/* + * call-seq: + * File.grpowned?(file_name) => true or false + * + * Returns <code>true</code> if the named file exists and the + * effective group id of the calling process is the owner of + * the file. Returns <code>false</code> on Windows. + */ + +static VALUE +test_grpowned(obj, fname) + VALUE obj, fname; +{ +#ifndef _WIN32 + struct stat st; + + if (rb_stat(fname, &st) < 0) return Qfalse; + if (st.st_gid == getegid()) return Qtrue; +#endif + return Qfalse; +} + +#if defined(S_ISUID) || defined(S_ISGID) || defined(S_ISVTX) +static VALUE +check3rdbyte(fname, mode) + VALUE fname; + int mode; +{ + struct stat st; + + rb_secure(2); + FilePathValue(fname); + if (stat(StringValueCStr(fname), &st) < 0) return Qfalse; + if (st.st_mode & mode) return Qtrue; + return Qfalse; +} +#endif + +/* + * call-seq: + * File.setuid?(file_name) => true or false + * + * Returns <code>true</code> if the named file is a has the setuid bit set. + */ + +static VALUE +test_suid(obj, fname) + VALUE obj, fname; +{ +#ifdef S_ISUID + return check3rdbyte(fname, S_ISUID); +#else + return Qfalse; +#endif +} + +/* + * call-seq: + * File.setgid?(file_name) => true or false + * + * Returns <code>true</code> if the named file is a has the setgid bit set. + */ + +static VALUE +test_sgid(obj, fname) + VALUE obj, fname; +{ +#ifdef S_ISGID + return check3rdbyte(fname, S_ISGID); +#else + return Qfalse; +#endif +} + +/* + * call-seq: + * File.sticky?(file_name) => true or false + * + * Returns <code>true</code> if the named file is a has the sticky bit set. + */ + +static VALUE +test_sticky(obj, fname) + VALUE obj, fname; +{ +#ifdef S_ISVTX + return check3rdbyte(fname, S_ISVTX); +#else + return Qnil; +#endif +} + +/* + * call-seq: + * File.size(file_name) => integer + * + * Returns the size of <code>file_name</code>. + */ + +static VALUE +rb_file_s_size(klass, fname) + VALUE klass, fname; +{ + struct stat st; + + if (rb_stat(fname, &st) < 0) + rb_sys_fail(StringValueCStr(fname)); + return OFFT2NUM(st.st_size); +} + +static VALUE +rb_file_ftype(st) + struct stat *st; +{ + char *t; + + if (S_ISREG(st->st_mode)) { + t = "file"; + } + else if (S_ISDIR(st->st_mode)) { + t = "directory"; + } + else if (S_ISCHR(st->st_mode)) { + t = "characterSpecial"; + } +#ifdef S_ISBLK + else if (S_ISBLK(st->st_mode)) { + t = "blockSpecial"; + } +#endif +#ifdef S_ISFIFO + else if (S_ISFIFO(st->st_mode)) { + t = "fifo"; + } +#endif +#ifdef S_ISLNK + else if (S_ISLNK(st->st_mode)) { + t = "link"; + } +#endif +#ifdef S_ISSOCK + else if (S_ISSOCK(st->st_mode)) { + t = "socket"; + } +#endif + else { + t = "unknown"; + } + + return rb_str_new2(t); +} + +/* + * call-seq: + * File.ftype(file_name) => string + * + * Identifies the type of the named file; the return string is one of + * ``<code>file</code>'', ``<code>directory</code>'', + * ``<code>characterSpecial</code>'', ``<code>blockSpecial</code>'', + * ``<code>fifo</code>'', ``<code>link</code>'', + * ``<code>socket</code>'', or ``<code>unknown</code>''. + * + * File.ftype("testfile") #=> "file" + * File.ftype("/dev/tty") #=> "characterSpecial" + * File.ftype("/tmp/.X11-unix/X0") #=> "socket" + */ + +static VALUE +rb_file_s_ftype(klass, fname) + VALUE klass, fname; +{ + struct stat st; + + rb_secure(2); + FilePathValue(fname); + if (lstat(StringValueCStr(fname), &st) == -1) { + rb_sys_fail(RSTRING(fname)->ptr); + } + + return rb_file_ftype(&st); +} + +/* + * call-seq: + * File.atime(file_name) => time + * + * Returns the last access time for the named file as a Time object). + * + * File.atime("testfile") #=> Wed Apr 09 08:51:48 CDT 2003 + * + */ + +static VALUE +rb_file_s_atime(klass, fname) + VALUE klass, fname; +{ + struct stat st; + + if (rb_stat(fname, &st) < 0) + rb_sys_fail(StringValueCStr(fname)); + return rb_time_new(st.st_atime, 0); +} + +/* + * call-seq: + * file.atime => time + * + * Returns the last access time (a <code>Time</code> object) + * for <i>file</i>, or epoch if <i>file</i> has not been accessed. + * + * File.new("testfile").atime #=> Wed Dec 31 18:00:00 CST 1969 + * + */ + +static VALUE +rb_file_atime(obj) + VALUE obj; +{ + OpenFile *fptr; + struct stat st; + + GetOpenFile(obj, fptr); + if (fstat(fptr->fd, &st) == -1) { + rb_sys_fail(fptr->path); + } + return rb_time_new(st.st_atime, 0); +} + +/* + * call-seq: + * File.mtime(file_name) => time + * + * Returns the modification time for the named file as a Time object. + * + * File.mtime("testfile") #=> Tue Apr 08 12:58:04 CDT 2003 + * + */ + +static VALUE +rb_file_s_mtime(klass, fname) + VALUE klass, fname; +{ + struct stat st; + + if (rb_stat(fname, &st) < 0) + rb_sys_fail(RSTRING(fname)->ptr); + return rb_time_new(st.st_mtime, 0); +} + +/* + * call-seq: + * file.mtime -> time + * + * Returns the modification time for <i>file</i>. + * + * File.new("testfile").mtime #=> Wed Apr 09 08:53:14 CDT 2003 + * + */ + +static VALUE +rb_file_mtime(obj) + VALUE obj; +{ + OpenFile *fptr; + struct stat st; + + GetOpenFile(obj, fptr); + if (fstat(fptr->fd, &st) == -1) { + rb_sys_fail(fptr->path); + } + return rb_time_new(st.st_mtime, 0); +} + +/* + * call-seq: + * File.ctime(file_name) => time + * + * Returns the change time for the named file (the time at which + * directory information about the file was changed, not the file + * itself). + * + * File.ctime("testfile") #=> Wed Apr 09 08:53:13 CDT 2003 + * + */ + +static VALUE +rb_file_s_ctime(klass, fname) + VALUE klass, fname; +{ + struct stat st; + + if (rb_stat(fname, &st) < 0) + rb_sys_fail(RSTRING(fname)->ptr); + return rb_time_new(st.st_ctime, 0); +} + +/* + * call-seq: + * file.ctime -> time + * + * Returns the change time for <i>file</i> (that is, the time directory + * information about the file was changed, not the file itself). + * + * File.new("testfile").ctime #=> Wed Apr 09 08:53:14 CDT 2003 + * + */ + +static VALUE +rb_file_ctime(obj) + VALUE obj; +{ + OpenFile *fptr; + struct stat st; + + GetOpenFile(obj, fptr); + if (fstat(fptr->fd, &st) == -1) { + rb_sys_fail(fptr->path); + } + return rb_time_new(st.st_ctime, 0); +} + +static void +chmod_internal(path, mode) + const char *path; + int mode; +{ + if (chmod(path, mode) < 0) + rb_sys_fail(path); +} + +/* + * call-seq: + * File.chmod(mode_int, file_name, ... ) -> integer + * + * Changes permission bits on the named file(s) to the bit pattern + * represented by <i>mode_int</i>. Actual effects are operating system + * dependent (see the beginning of this section). On Unix systems, see + * <code>chmod(2)</code> for details. Returns the number of files + * processed. + * + * File.chmod(0644, "testfile", "out") #=> 2 + */ + +static VALUE +rb_file_s_chmod(argc, argv) + int argc; + VALUE *argv; +{ + VALUE vmode; + VALUE rest; + int mode; + long n; + + rb_secure(2); + rb_scan_args(argc, argv, "1*", &vmode, &rest); + mode = NUM2INT(vmode); + + n = apply2files(chmod_internal, rest, (void *)(long)mode); + return LONG2FIX(n); +} + +/* + * call-seq: + * file.chmod(mode_int) => 0 + * + * Changes permission bits on <i>file</i> to the bit pattern + * represented by <i>mode_int</i>. Actual effects are platform + * dependent; on Unix systems, see <code>chmod(2)</code> for details. + * Follows symbolic links. Also see <code>File#lchmod</code>. + * + * f = File.new("out", "w"); + * f.chmod(0644) #=> 0 + */ + +static VALUE +rb_file_chmod(obj, vmode) + VALUE obj, vmode; +{ + OpenFile *fptr; + int mode; + + rb_secure(2); + mode = NUM2INT(vmode); + + GetOpenFile(obj, fptr); +#ifdef HAVE_FCHMOD + if (fchmod(fptr->fd, mode) == -1) + rb_sys_fail(fptr->path); +#else + if (!fptr->path) return Qnil; + if (chmod(fptr->path, mode) == -1) + rb_sys_fail(fptr->path); +#endif + + return INT2FIX(0); +} + +#if defined(HAVE_LCHMOD) +static void +lchmod_internal(path, mode) + const char *path; + int mode; +{ + if (lchmod(path, mode) < 0) + rb_sys_fail(path); +} + +/* + * call-seq: + * File.lchmod(mode_int, file_name, ...) => integer + * + * Equivalent to <code>File::chmod</code>, but does not follow symbolic + * links (so it will change the permissions associated with the link, + * not the file referenced by the link). Often not available. + * + */ + +static VALUE +rb_file_s_lchmod(argc, argv) + int argc; + VALUE *argv; +{ + VALUE vmode; + VALUE rest; + long mode, n; + + rb_secure(2); + rb_scan_args(argc, argv, "1*", &vmode, &rest); + mode = NUM2INT(vmode); + + n = apply2files(lchmod_internal, rest, (void *)(long)mode); + return LONG2FIX(n); +} +#else +static VALUE +rb_file_s_lchmod(argc, argv) + int argc; + VALUE *argv; +{ + rb_notimplement(); + return Qnil; /* not reached */ +} +#endif + +struct chown_args { + int owner, group; +}; + +static void +chown_internal(path, args) + const char *path; + struct chown_args *args; +{ + if (chown(path, args->owner, args->group) < 0) + rb_sys_fail(path); +} + +/* + * call-seq: + * File.chown(owner_int, group_int, file_name,... ) -> integer + * + * Changes the owner and group of the named file(s) to the given + * numeric owner and group id's. Only a process with superuser + * privileges may change the owner of a file. The current owner of a + * file may change the file's group to any group to which the owner + * belongs. A <code>nil</code> or -1 owner or group id is ignored. + * Returns the number of files processed. + * + * File.chown(nil, 100, "testfile") + * + */ + +static VALUE +rb_file_s_chown(argc, argv) + int argc; + VALUE *argv; +{ + VALUE o, g, rest; + struct chown_args arg; + long n; + + rb_secure(2); + rb_scan_args(argc, argv, "2*", &o, &g, &rest); + if (NIL_P(o)) { + arg.owner = -1; + } + else { + arg.owner = NUM2INT(o); + } + if (NIL_P(g)) { + arg.group = -1; + } + else { + arg.group = NUM2INT(g); + } + + n = apply2files(chown_internal, rest, &arg); + return LONG2FIX(n); +} + +/* + * call-seq: + * file.chown(owner_int, group_int ) => 0 + * + * Changes the owner and group of <i>file</i> to the given numeric + * owner and group id's. Only a process with superuser privileges may + * change the owner of a file. The current owner of a file may change + * the file's group to any group to which the owner belongs. A + * <code>nil</code> or -1 owner or group id is ignored. Follows + * symbolic links. See also <code>File#lchown</code>. + * + * File.new("testfile").chown(502, 1000) + * + */ + +static VALUE +rb_file_chown(obj, owner, group) + VALUE obj, owner, group; +{ + OpenFile *fptr; + int o, g; + + rb_secure(2); + o = NUM2INT(owner); + g = NUM2INT(group); + GetOpenFile(obj, fptr); +#if defined(DJGPP) || defined(__CYGWIN32__) || defined(_WIN32) || defined(__EMX__) + if (!fptr->path) return Qnil; + if (chown(fptr->path, o, g) == -1) + rb_sys_fail(fptr->path); +#else + if (fchown(fptr->fd, o, g) == -1) + rb_sys_fail(fptr->path); +#endif + + return INT2FIX(0); +} + +#if defined(HAVE_LCHOWN) && !defined(__CHECKER__) +static void +lchown_internal(path, args) + const char *path; + struct chown_args *args; +{ + if (lchown(path, args->owner, args->group) < 0) + rb_sys_fail(path); +} + + +/* + * call-seq: + * file.lchown(owner_int, group_int, file_name,..) => integer + * + * Equivalent to <code>File::chown</code>, but does not follow symbolic + * links (so it will change the owner associated with the link, not the + * file referenced by the link). Often not available. Returns number + * of files in the argument list. + * + */ + +static VALUE +rb_file_s_lchown(argc, argv) + int argc; + VALUE *argv; +{ + VALUE o, g, rest; + struct chown_args arg; + long n; + + rb_secure(2); + rb_scan_args(argc, argv, "2*", &o, &g, &rest); + if (NIL_P(o)) { + arg.owner = -1; + } + else { + arg.owner = NUM2INT(o); + } + if (NIL_P(g)) { + arg.group = -1; + } + else { + arg.group = NUM2INT(g); + } + + n = apply2files(lchown_internal, rest, &arg); + return LONG2FIX(n); +} +#else +static VALUE +rb_file_s_lchown(argc, argv) + int argc; + VALUE *argv; +{ + rb_notimplement(); +} +#endif + +struct timeval rb_time_timeval(); + +#if defined(HAVE_UTIMES) && !defined(__CHECKER__) + +static void +utime_internal(path, tvp) + char *path; + struct timeval tvp[]; +{ + if (utimes(path, tvp) < 0) + rb_sys_fail(path); +} + +/* + * call-seq: + * File.utime(atime, mtime, file_name,...) => integer + * + * Sets the access and modification times of each + * named file to the first two arguments. Returns + * the number of file names in the argument list. + */ + +static VALUE +rb_file_s_utime(argc, argv) + int argc; + VALUE *argv; +{ + VALUE atime, mtime, rest; + struct timeval tvp[2]; + long n; + + rb_scan_args(argc, argv, "2*", &atime, &mtime, &rest); + + tvp[0] = rb_time_timeval(atime); + tvp[1] = rb_time_timeval(mtime); + + n = apply2files(utime_internal, rest, tvp); + return LONG2FIX(n); +} + +#else + +#if !defined HAVE_UTIME_H && !defined HAVE_SYS_UTIME_H +struct utimbuf { + long actime; + long modtime; +}; +#endif + +static void +utime_internal(path, utp) + const char *path; + struct utimbuf *utp; +{ + if (utime(path, utp) < 0) + rb_sys_fail(path); +} + +static VALUE +rb_file_s_utime(argc, argv) + int argc; + VALUE *argv; +{ + VALUE atime, mtime, rest; + long n; + struct timeval tv; + struct utimbuf utbuf; + + rb_scan_args(argc, argv, "2*", &atime, &mtime, &rest); + + tv = rb_time_timeval(atime); + utbuf.actime = tv.tv_sec; + tv = rb_time_timeval(mtime); + utbuf.modtime = tv.tv_sec; + + n = apply2files(utime_internal, rest, &utbuf); + return LONG2FIX(n); +} + +#endif + +NORETURN(static void sys_fail2 _((VALUE,VALUE))); +static void +sys_fail2(s1, s2) + VALUE s1, s2; +{ + char *buf; + int len; + + len = RSTRING(s1)->len + RSTRING(s2)->len + 5; + buf = ALLOCA_N(char, len); + snprintf(buf, len, "%s or %s", RSTRING(s1)->ptr, RSTRING(s2)->ptr); + rb_sys_fail(buf); +} + +/* + * call-seq: + * File.link(old_name, new_name) => 0 + * + * Creates a new name for an existing file using a hard link. Will not + * overwrite <i>new_name</i> if it already exists (raising a subclass + * of <code>SystemCallError</code>). Not available on all platforms. + * + * File.link("testfile", ".testfile") #=> 0 + * IO.readlines(".testfile")[0] #=> "This is line one\n" + */ + +static VALUE +rb_file_s_link(klass, from, to) + VALUE klass, from, to; +{ +#ifdef HAVE_LINK + rb_secure(2); + FilePathValue(from); + FilePathValue(to); + + if (link(StringValueCStr(from), StringValueCStr(to)) < 0) { + sys_fail2(from, to); + } + return INT2FIX(0); +#else + rb_notimplement(); + return Qnil; /* not reached */ +#endif +} + +/* + * call-seq: + * File.symlink(old_name, new_name) => 0 + * + * Creates a symbolic link called <i>new_name</i> for the existing file + * <i>old_name</i>. Raises a <code>NotImplemented</code> exception on + * platforms that do not support symbolic links. + * + * File.symlink("testfile", "link2test") #=> 0 + * + */ + +static VALUE +rb_file_s_symlink(klass, from, to) + VALUE klass, from, to; +{ +#ifdef HAVE_SYMLINK + rb_secure(2); + FilePathValue(from); + FilePathValue(to); + + if (symlink(StringValueCStr(from), StringValueCStr(to)) < 0) { + sys_fail2(from, to); + } + return INT2FIX(0); +#else + rb_notimplement(); + return Qnil; /* not reached */ +#endif +} + +/* + * call-seq: + * File.readlink(link_name) -> file_name + * + * Returns the name of the file referenced by the given link. + * Not available on all platforms. + * + * File.symlink("testfile", "link2test") #=> 0 + * File.readlink("link2test") #=> "testfile" + */ + +static VALUE +rb_file_s_readlink(klass, path) + VALUE klass, path; +{ +#ifdef HAVE_READLINK + char *buf; + int size = 100; + int rv; + VALUE v; + + rb_secure(2); + FilePathValue(path); + buf = xmalloc(size); + while ((rv = readlink(StringValueCStr(path), buf, size)) == size) { + size *= 2; + buf = xrealloc(buf, size); + } + if (rv < 0) { + free(buf); + rb_sys_fail(RSTRING(path)->ptr); + } + v = rb_tainted_str_new(buf, rv); + free(buf); + + return v; +#else + rb_notimplement(); + return Qnil; /* not reached */ +#endif +} + +static void +unlink_internal(path) + const char *path; +{ + if (unlink(path) < 0) + rb_sys_fail(path); +} + +/* + * call-seq: + * File.delete(file_name, ...) => integer + * File.unlink(file_name, ...) => integer + * + * Deletes the named files, returning the number of names + * passed as arguments. Raises an exception on any error. + * See also <code>Dir::rmdir</code>. + */ + +static VALUE +rb_file_s_unlink(klass, args) + VALUE klass, args; +{ + long n; + + rb_secure(2); + n = apply2files(unlink_internal, args, 0); + return LONG2FIX(n); +} + +/* + * call-seq: + * File.rename(old_name, new_name) => 0 + * + * Renames the given file to the new name. Raises a + * <code>SystemCallError</code> if the file cannot be renamed. + * + * File.rename("afile", "afile.bak") #=> 0 + */ + +static VALUE +rb_file_s_rename(klass, from, to) + VALUE klass, from, to; +{ + const char *src, *dst; + + rb_secure(2); + FilePathValue(from); + FilePathValue(to); + src = StringValueCStr(from); + dst = StringValueCStr(to); + if (rename(src, dst) < 0) { +#if defined __CYGWIN__ + extern unsigned long __attribute__((stdcall)) GetLastError(); + errno = GetLastError(); /* This is a Cygwin bug */ +#elif defined DOSISH && !defined _WIN32 + if (errno == EEXIST +#if defined (__EMX__) + || errno == EACCES +#endif + ) { + if (chmod(dst, 0666) == 0 && + unlink(dst) == 0 && + rename(src, dst) == 0) + return INT2FIX(0); + } +#endif + sys_fail2(from, to); + } + + return INT2FIX(0); +} + +/* + * call-seq: + * File.umask() => integer + * File.umask(integer) => integer + * + * Returns the current umask value for this process. If the optional + * argument is given, set the umask to that value and return the + * previous value. Umask values are <em>subtracted</em> from the + * default permissions, so a umask of <code>0222</code> would make a + * file read-only for everyone. + * + * File.umask(0006) #=> 18 + * File.umask #=> 6 + */ + +static VALUE +rb_file_s_umask(argc, argv) + int argc; + VALUE *argv; +{ + int omask = 0; + + rb_secure(2); + if (argc == 0) { + omask = umask(0); + umask(omask); + } + else if (argc == 1) { + omask = umask(NUM2INT(argv[0])); + } + else { + rb_raise(rb_eArgError, "wrong number of arguments"); + } + return INT2FIX(omask); +} + +#if defined DOSISH +#define DOSISH_UNC +#define isdirsep(x) ((x) == '/' || (x) == '\\') +#else +#define isdirsep(x) ((x) == '/') +#endif +#ifndef CharNext /* defined as CharNext[AW] on Windows. */ +# if defined(DJGPP) +# define CharNext(p) ((p) + mblen(p, RUBY_MBCHAR_MAXSIZE)) +# else +# define CharNext(p) ((p) + 1) +# endif +#endif + +#ifdef __CYGWIN__ +#undef DOSISH +#define DOSISH_UNC +#define DOSISH_DRIVE_LETTER +#endif + +#ifdef DOSISH_DRIVE_LETTER +static inline int +has_drive_letter(buf) + const char *buf; +{ + if (ISALPHA(buf[0]) && buf[1] == ':') { + return 1; + } + else { + return 0; + } +} + +static char* +getcwdofdrv(drv) + int drv; +{ + char drive[4]; + char *drvcwd, *oldcwd; + + drive[0] = drv; + drive[1] = ':'; + drive[2] = '\0'; + + /* the only way that I know to get the current directory + of a particular drive is to change chdir() to that drive, + so save the old cwd before chdir() + */ + oldcwd = my_getcwd(); + if (chdir(drive) == 0) { + drvcwd = my_getcwd(); + chdir(oldcwd); + free(oldcwd); + } + else { + /* perhaps the drive is not exist. we return only drive letter */ + drvcwd = strdup(drive); + } + return drvcwd; +} +#endif + +static inline char * +skiproot(path) + const char *path; +{ +#ifdef DOSISH_DRIVE_LETTER + if (has_drive_letter(path)) path += 2; +#endif + while (isdirsep(*path)) path++; + return (char *)path; +} + +#define nextdirsep rb_path_next +char * +rb_path_next(s) + const char *s; +{ + while (*s && !isdirsep(*s)) { + s = CharNext(s); + } + return (char *)s; +} + +#define skipprefix rb_path_skip_prefix +char * +rb_path_skip_prefix(path) + const char *path; +{ +#if defined(DOSISH_UNC) || defined(DOSISH_DRIVE_LETTER) +#ifdef DOSISH_UNC + if (isdirsep(path[0]) && isdirsep(path[1])) { + if (*(path = nextdirsep(path + 2))) + path = nextdirsep(path + 1); + return (char *)path; + } +#endif +#ifdef DOSISH_DRIVE_LETTER + if (has_drive_letter(path)) + return (char *)(path + 2); +#endif +#endif + return (char *)path; +} + +#define strrdirsep rb_path_last_separator +char * +rb_path_last_separator(path) + const char *path; +{ + char *last = NULL; + while (*path) { + if (isdirsep(*path)) { + const char *tmp = path++; + while (isdirsep(*path)) path++; + if (!*path) break; + last = (char *)tmp; + } + else { + path = CharNext(path); + } + } + return last; +} + +#define chompdirsep rb_path_end +char * +rb_path_end(path) + const char *path; +{ + while (*path) { + if (isdirsep(*path)) { + const char *last = path++; + while (isdirsep(*path)) path++; + if (!*path) return (char *)last; + } + else { + path = CharNext(path); + } + } + return (char *)path; +} + +#define BUFCHECK(cond) do {\ + long bdiff = p - buf;\ + while (cond) {\ + buflen *= 2;\ + }\ + rb_str_resize(result, buflen);\ + buf = RSTRING(result)->ptr;\ + p = buf + bdiff;\ + pend = buf + buflen;\ +} while (0) + +#define BUFINIT() (\ + p = buf = RSTRING(result)->ptr,\ + buflen = RSTRING(result)->len,\ + pend = p + buflen) + +#if !defined(TOLOWER) +#define TOLOWER(c) (ISUPPER(c) ? tolower(c) : (c)) +#endif + +static int is_absolute_path _((const char*)); + +static VALUE +file_expand_path(fname, dname, result) + VALUE fname, dname, result; +{ + char *s, *buf, *b, *p, *pend, *root; + long buflen, dirlen; + int tainted; + + s = StringValuePtr(fname); + BUFINIT(); + tainted = OBJ_TAINTED(fname); + + if (s[0] == '~') { + if (isdirsep(s[1]) || s[1] == '\0') { + char *dir = getenv("HOME"); + + if (!dir) { + rb_raise(rb_eArgError, "couldn't find HOME environment -- expanding `%s'", s); + } + dirlen = strlen(dir); + BUFCHECK(dirlen > buflen); + strcpy(buf, dir); +#if defined DOSISH || defined __CYGWIN__ + for (p = buf; *p; p = CharNext(p)) { + if (*p == '\\') { + *p = '/'; + } + } +#else + p = buf + strlen(dir); +#endif + s++; + tainted = 1; + } + else { +#ifdef HAVE_PWD_H + struct passwd *pwPtr; + s++; +#endif + s = nextdirsep(b = s); + BUFCHECK(bdiff + (s-b) >= buflen); + memcpy(p, b, s-b); + p += s-b; + *p = '\0'; +#ifdef HAVE_PWD_H + pwPtr = getpwnam(buf); + if (!pwPtr) { + endpwent(); + rb_raise(rb_eArgError, "user %s doesn't exist", buf); + } + dirlen = strlen(pwPtr->pw_dir); + BUFCHECK(dirlen > buflen); + strcpy(buf, pwPtr->pw_dir); + p = buf + strlen(pwPtr->pw_dir); + endpwent(); +#endif + } + } +#ifdef DOSISH_DRIVE_LETTER + /* skip drive letter */ + else if (has_drive_letter(s)) { + if (isdirsep(s[2])) { + /* specified drive letter, and full path */ + /* skip drive letter */ + BUFCHECK(bdiff + 2 >= buflen); + memcpy(p, s, 2); + p += 2; + s += 2; + } + else { + /* specified drive, but not full path */ + int same = 0; + if (!NIL_P(dname)) { + file_expand_path(dname, Qnil, result); + BUFINIT(); + if (has_drive_letter(p) && TOLOWER(p[0]) == TOLOWER(s[0])) { + /* ok, same drive */ + same = 1; + } + } + if (!same) { + char *dir = getcwdofdrv(*s); + + tainted = 1; + dirlen = strlen(dir); + BUFCHECK(dirlen > buflen); + strcpy(buf, dir); + free(dir); + } + p = chompdirsep(skiproot(buf)); + s += 2; + } + } +#endif + else if (!is_absolute_path(s)) { + if (!NIL_P(dname)) { + file_expand_path(dname, Qnil, result); + BUFINIT(); + } + else { + char *dir = my_getcwd(); + + tainted = 1; + dirlen = strlen(dir); + BUFCHECK(dirlen > buflen); + strcpy(buf, dir); + free(dir); + } +#if defined DOSISH || defined __CYGWIN__ + if (isdirsep(*s)) { + /* specified full path, but not drive letter nor UNC */ + /* we need to get the drive letter or UNC share name */ + p = skipprefix(buf); + } + else +#endif + p = chompdirsep(skiproot(buf)); + } + else { + b = s; + do s++; while (isdirsep(*s)); + p = buf + (s - b); + BUFCHECK(bdiff >= buflen); + memset(buf, '/', p - buf); + } + if (p > buf && p[-1] == '/') + --p; + else + *p = '/'; + + p[1] = 0; + root = skipprefix(buf); + + b = s; + while (*s) { + switch (*s) { + case '.': + if (b == s++) { /* beginning of path element */ + switch (*s) { + case '\0': + b = s; + break; + case '.': + if (*(s+1) == '\0' || isdirsep(*(s+1))) { + /* We must go back to the parent */ + *p = '\0'; + if (!(b = strrdirsep(root))) { + *p = '/'; + } + else { + p = b; + } + b = ++s; + } + break; + case '/': +#if defined DOSISH || defined __CYGWIN__ + case '\\': +#endif + b = ++s; + break; + default: + /* ordinary path element, beginning don't move */ + break; + } + } + break; + case '/': +#if defined DOSISH || defined __CYGWIN__ + case '\\': +#endif + if (s > b) { + long rootdiff = root - buf; + BUFCHECK(bdiff + (s-b+1) >= buflen); + root = buf + rootdiff; + memcpy(++p, b, s-b); + p += s-b; + *p = '/'; + } + b = ++s; + break; + default: + s = CharNext(s); + break; + } + } + + if (s > b) { + BUFCHECK(bdiff + (s-b) >= buflen); + memcpy(++p, b, s-b); + p += s-b; + } + if (p == skiproot(buf) - 1) p++; + + if (tainted) OBJ_TAINT(result); + RSTRING(result)->len = p - buf; + *p = '\0'; + return result; +} + +VALUE +rb_file_expand_path(fname, dname) + VALUE fname, dname; +{ + return file_expand_path(fname, dname, rb_str_new(0, MAXPATHLEN + 2)); +} + +/* + * call-seq: + * File.expand_path(file_name [, dir_string] ) -> abs_file_name + * + * Converts a pathname to an absolute pathname. Relative paths are + * referenced from the current working directory of the process unless + * <i>dir_string</i> is given, in which case it will be used as the + * starting point. The given pathname may start with a + * ``<code>~</code>'', which expands to the process owner's home + * directory (the environment variable <code>HOME</code> must be set + * correctly). ``<code>~</code><i>user</i>'' expands to the named + * user's home directory. + * + * File.expand_path("~oracle/bin") #=> "/home/oracle/bin" + * File.expand_path("../../bin", "/tmp/x") #=> "/bin" + */ + +VALUE +rb_file_s_expand_path(argc, argv) + int argc; + VALUE *argv; +{ + VALUE fname, dname; + + if (argc == 1) { + return rb_file_expand_path(argv[0], Qnil); + } + rb_scan_args(argc, argv, "11", &fname, &dname); + + return rb_file_expand_path(fname, dname); +} + +static int +rmext(p, e) + const char *p, *e; +{ + int l1, l2; + + if (!e) return 0; + + l1 = chompdirsep(p) - p; + l2 = strlen(e); + if (l2 == 2 && e[1] == '*') { + e = strrchr(p, *e); + if (!e) return 0; + return e - p; + } + if (l1 < l2) return l1; + + if (strncmp(p+l1-l2, e, l2) == 0) { + return l1-l2; + } + return 0; +} + +/* + * call-seq: + * File.basename(file_name [, suffix] ) -> base_name + * + * Returns the last component of the filename given in <i>file_name</i>, + * which must be formed using forward slashes (``<code>/</code>'') + * regardless of the separator used on the local file system. If + * <i>suffix</i> is given and present at the end of <i>file_name</i>, + * it is removed. + * + * File.basename("/home/gumby/work/ruby.rb") #=> "ruby.rb" + * File.basename("/home/gumby/work/ruby.rb", ".rb") #=> "ruby" + */ + +static VALUE +rb_file_s_basename(argc, argv) + int argc; + VALUE *argv; +{ + VALUE fname, fext, basename; + char *name, *p; + int f; + + if (rb_scan_args(argc, argv, "11", &fname, &fext) == 2) { + StringValue(fext); + } + StringValue(fname); + if (RSTRING(fname)->len == 0 || !*(name = RSTRING(fname)->ptr)) + return fname; + if (!*(name = skiproot(name))) { + p = name - 1; + f = 1; +#ifdef DOSISH_DRIVE_LETTER + if (*p == ':') { + p++; + f = 0; + } +#endif + } + else if (!(p = strrdirsep(name))) { + if (NIL_P(fext) || !(f = rmext(name, StringValueCStr(fext)))) { + f = chompdirsep(name) - name; + if (f == RSTRING(fname)->len) return fname; + } + p = name; + } + else { + while (isdirsep(*p)) p++; /* skip last / */ + if (NIL_P(fext) || !(f = rmext(p, StringValueCStr(fext)))) { + f = chompdirsep(p) - p; + } + } + basename = rb_str_new(p, f); + OBJ_INFECT(basename, fname); + return basename; +} + +/* + * call-seq: + * File.dirname(file_name ) -> dir_name + * + * Returns all components of the filename given in <i>file_name</i> + * except the last one. The filename must be formed using forward + * slashes (``<code>/</code>'') regardless of the separator used on the + * local file system. + * + * File.dirname("/home/gumby/work/ruby.rb") #=> "/home/gumby/work" + */ + +static VALUE +rb_file_s_dirname(klass, fname) + VALUE klass, fname; +{ + char *name, *root, *p; + VALUE dirname; + + name = StringValueCStr(fname); + root = skiproot(name); +#ifdef DOSISH_UNC + if (root > name + 2 && isdirsep(*name)) + name = root - 2; +#else + if (root > name + 1) + name = root - 1; +#endif + p = strrdirsep(root); + if (!p) { + p = root; + } + if (p == name) + return rb_str_new2("."); + dirname = rb_str_new(name, p - name); +#ifdef DOSISH_DRIVE_LETTER + if (root == name + 2 && name[1] == ':') + rb_str_cat(dirname, ".", 1); +#endif + OBJ_INFECT(dirname, fname); + return dirname; +} + +/* + * call-seq: + * File.extname(path) -> string + * + * Returns the extension (the portion of file name in <i>path</i> + * after the period). + * + * File.extname("test.rb") #=> ".rb" + * File.extname("a/b/d/test.rb") #=> ".rb" + * File.extname("test") #=> "" + * File.extname(".profile") #=> "" + * + */ + +static VALUE +rb_file_s_extname(klass, fname) + VALUE klass, fname; +{ + char *name, *p, *e; + VALUE extname; + + name = StringValueCStr(fname); + p = strrdirsep(name); /* get the last path component */ + if (!p) + p = name; + else + p++; + + e = strrchr(p, '.'); /* get the last dot of the last component */ + if (!e || e == p) /* no dot, or the only dot is first? */ + return rb_str_new2(""); + extname = rb_str_new(e, chompdirsep(e) - e); /* keep the dot, too! */ + OBJ_INFECT(extname, fname); + return extname; +} + +/* + * call-seq: + * File.path(path) -> string + * + * Returns the string representation of the path + * + * File.path("/dev/null") #=> "/dev/null" + * File.path(Pathname.new("/tmp")) #=> "/tmp" + * + */ + +static VALUE +rb_file_s_path(klass, fname) + VALUE klass, fname; +{ + return rb_get_path(fname); +} + +/* + * call-seq: + * File.split(file_name) => array + * + * Splits the given string into a directory and a file component and + * returns them in a two-element array. See also + * <code>File::dirname</code> and <code>File::basename</code>. + * + * File.split("/home/gumby/.profile") #=> ["/home/gumby", ".profile"] + */ + +static VALUE +rb_file_s_split(klass, path) + VALUE klass, path; +{ + StringValue(path); /* get rid of converting twice */ + return rb_assoc_new(rb_file_s_dirname(Qnil, path), rb_file_s_basename(1,&path)); +} + +static VALUE separator; + +static VALUE rb_file_join _((VALUE ary, VALUE sep)); + +static VALUE +file_inspect_join(ary, arg, recur) + VALUE ary; + VALUE *arg; +{ + if (recur) return rb_str_new2("[...]"); + return rb_file_join(arg[0], arg[1]); +} + +static VALUE +rb_file_join(ary, sep) + VALUE ary, sep; +{ + long len, i; + int taint = 0; + VALUE result, tmp; + char *name; + + if (RARRAY(ary)->len == 0) return rb_str_new(0, 0); + if (OBJ_TAINTED(ary)) taint = 1; + if (OBJ_TAINTED(sep)) taint = 1; + + len = 1; + for (i=0; i<RARRAY(ary)->len; i++) { + if (TYPE(RARRAY(ary)->ptr[i]) == T_STRING) { + len += RSTRING(RARRAY(ary)->ptr[i])->len; + } + else { + len += 10; + } + } + if (!NIL_P(sep) && TYPE(sep) == T_STRING) { + len += RSTRING(sep)->len * RARRAY(ary)->len - 1; + } + result = rb_str_buf_new(len); + for (i=0; i<RARRAY(ary)->len; i++) { + tmp = RARRAY(ary)->ptr[i]; + switch (TYPE(tmp)) { + case T_STRING: + break; + case T_ARRAY: + { + VALUE args[2]; + + args[0] = tmp; + args[1] = sep; + tmp = rb_exec_recursive(file_inspect_join, ary, (VALUE)args); + } + break; + default: + tmp = rb_obj_as_string(tmp); + } + name = StringValueCStr(result); + if (i > 0 && !NIL_P(sep) && !*chompdirsep(name)) + rb_str_buf_append(result, sep); + rb_str_buf_append(result, tmp); + if (OBJ_TAINTED(tmp)) taint = 1; + } + + if (taint) OBJ_TAINT(result); + return result; +} + +/* + * call-seq: + * File.join(string, ...) -> path + * + * Returns a new string formed by joining the strings using + * <code>File::SEPARATOR</code>. + * + * File.join("usr", "mail", "gumby") #=> "usr/mail/gumby" + * + */ + +static VALUE +rb_file_s_join(klass, args) + VALUE klass, args; +{ + return rb_file_join(args, separator); +} + +/* + * call-seq: + * File.truncate(file_name, integer) => 0 + * + * Truncates the file <i>file_name</i> to be at most <i>integer</i> + * bytes long. Not available on all platforms. + * + * f = File.new("out", "w") + * f.write("1234567890") #=> 10 + * f.close #=> nil + * File.truncate("out", 5) #=> 0 + * File.size("out") #=> 5 + * + */ + +static VALUE +rb_file_s_truncate(klass, path, len) + VALUE klass, path, len; +{ + off_t pos; + + rb_secure(2); + pos = NUM2OFFT(len); + FilePathValue(path); +#ifdef HAVE_TRUNCATE + if (truncate(StringValueCStr(path), pos) < 0) + rb_sys_fail(RSTRING(path)->ptr); +#else +# ifdef HAVE_CHSIZE + { + int tmpfd; + +# ifdef _WIN32 + if ((tmpfd = open(StringValueCStr(path), O_RDWR)) < 0) { + rb_sys_fail(RSTRING(path)->ptr); + } +# else + if ((tmpfd = open(StringValueCStr(path), 0)) < 0) { + rb_sys_fail(RSTRING(path)->ptr); + } +# endif + if (chsize(tmpfd, pos) < 0) { + close(tmpfd); + rb_sys_fail(RSTRING(path)->ptr); + } + close(tmpfd); + } +# else + rb_notimplement(); +# endif +#endif + return INT2FIX(0); +} + +/* + * call-seq: + * file.truncate(integer) => 0 + * + * Truncates <i>file</i> to at most <i>integer</i> bytes. The file + * must be opened for writing. Not available on all platforms. + * + * f = File.new("out", "w") + * f.syswrite("1234567890") #=> 10 + * f.truncate(5) #=> 0 + * f.close() #=> nil + * File.size("out") #=> 5 + */ + +static VALUE +rb_file_truncate(obj, len) + VALUE obj, len; +{ + OpenFile *fptr; + off_t pos; + + rb_secure(2); + pos = NUM2OFFT(len); + GetOpenFile(obj, fptr); + if (!(fptr->mode & FMODE_WRITABLE)) { + rb_raise(rb_eIOError, "not opened for writing"); + } + rb_io_flush(obj); +#ifdef HAVE_TRUNCATE + if (ftruncate(fptr->fd, pos) < 0) + rb_sys_fail(fptr->path); +#else +# ifdef HAVE_CHSIZE + if (chsize(fptr->fd, pos) < 0) + rb_sys_fail(fptr->path); +# else + rb_notimplement(); +# endif +#endif + return INT2FIX(0); +} + +# ifndef LOCK_SH +# define LOCK_SH 1 +# endif +# ifndef LOCK_EX +# define LOCK_EX 2 +# endif +# ifndef LOCK_NB +# define LOCK_NB 4 +# endif +# ifndef LOCK_UN +# define LOCK_UN 8 +# endif + +#if 1 +static int +rb_thread_flock(fd, op, fptr) + int fd, op; + OpenFile *fptr; +{ + if (rb_thread_alone() || (op & LOCK_NB)) { + return flock(fd, op); + } + op |= LOCK_NB; + while (flock(fd, op) < 0) { + switch (errno) { + case EAGAIN: + case EACCES: +#if defined(EWOULDBLOCK) && EWOULDBLOCK != EAGAIN + case EWOULDBLOCK: +#endif + rb_thread_polling(); /* busy wait */ + rb_io_check_closed(fptr); + continue; + default: + return -1; + } + } + return 0; +} +#define flock(fd, op) rb_thread_flock(fd, op, fptr) +#endif + +/* + * call-seq: + * file.flock (locking_constant ) => 0 or false + * + * Locks or unlocks a file according to <i>locking_constant</i> (a + * logical <em>or</em> of the values in the table below). + * Returns <code>false</code> if <code>File::LOCK_NB</code> is + * specified and the operation would otherwise have blocked. Not + * available on all platforms. + * + * Locking constants (in class File): + * + * LOCK_EX | Exclusive lock. Only one process may hold an + * | exclusive lock for a given file at a time. + * ----------+------------------------------------------------ + * LOCK_NB | Don't block when locking. May be combined + * | with other lock options using logical or. + * ----------+------------------------------------------------ + * LOCK_SH | Shared lock. Multiple processes may each hold a + * | shared lock for a given file at the same time. + * ----------+------------------------------------------------ + * LOCK_UN | Unlock. + * + * Example: + * + * File.new("testfile").flock(File::LOCK_UN) #=> 0 + * + */ + +static VALUE +rb_file_flock(obj, operation) + VALUE obj; + VALUE operation; +{ +#ifndef __CHECKER__ + OpenFile *fptr; + int op; + + rb_secure(2); + op = NUM2INT(operation); + GetOpenFile(obj, fptr); + + if (fptr->mode & FMODE_WRITABLE) { + rb_io_flush(obj); + } + retry: + if (flock(fptr->fd, op) < 0) { + switch (errno) { + case EAGAIN: + case EACCES: +#if defined(EWOULDBLOCK) && EWOULDBLOCK != EAGAIN + case EWOULDBLOCK: +#endif + return Qfalse; + case EINTR: +#if defined(ERESTART) + case ERESTART: +#endif + goto retry; + } + rb_sys_fail(fptr->path); + } +#endif + return INT2FIX(0); +} +#undef flock + +static void +test_check(n, argc, argv) + int n, argc; + VALUE *argv; +{ + int i; + + rb_secure(2); + n+=1; + if (n != argc) rb_raise(rb_eArgError, "wrong number of arguments (%d for %d)", argc, n); + for (i=1; i<n; i++) { + switch (TYPE(argv[i])) { + case T_STRING: + default: + FilePathValue(argv[i]); + break; + case T_FILE: + break; + } + } +} + +#define CHECK(n) test_check((n), argc, argv) + +/* + * call-seq: + * test(int_cmd, file1 [, file2] ) => obj + * + * Uses the integer <i>aCmd</i> to perform various tests on + * <i>file1</i> (first table below) or on <i>file1</i> and + * <i>file2</i> (second table). + * + * File tests on a single file: + * + * Test Returns Meaning + * ?A | Time | Last access time for file1 + * ?b | boolean | True if file1 is a block device + * ?c | boolean | True if file1 is a character device + * ?C | Time | Last change time for file1 + * ?d | boolean | True if file1 exists and is a directory + * ?e | boolean | True if file1 exists + * ?f | boolean | True if file1 exists and is a regular file + * ?g | boolean | True if files has the \CF{setgid} bit + * | | set (false under NT) + * ?G | boolean | True if file1 exists and has a group + * | | ownership equal to the caller's group + * ?k | boolean | True if file1 exists and has the sticky bit set + * ?l | boolean | True if files exists and is a symbolic link + * ?M | Time | Last modification time for file1 + * ?o | boolean | True if files exists and is owned by + * | | the caller's effective uid + * ?O | boolean | True if file1 exists and is owned by + * | | the caller's real uid + * ?p | boolean | True if file1 exists and is a fifo + * ?r | boolean | True if file1 is readable by the effective + * | | uid/gid of the caller + * ?R | boolean | True if file is readable by the real + * | | uid/gid of the caller + * ?s | int/nil | If files has nonzero size, return the size, + * | | otherwise return nil + * ?S | boolean | True if file1 exists and is a socket + * ?u | boolean | True if file1 has the setuid bit set + * ?w | boolean | True if file1 exists and is writable by + * | | the effective uid/gid + * ?W | boolean | True if file1 exists and is writable by + * | | the real uid/gid + * ?x | boolean | True if file1 exists and is executable by + * | | the effective uid/gid + * ?X | boolean | True if file1 exists and is executable by + * | | the real uid/gid + * ?z | boolean | True if file1 exists and has a zero length + * + * Tests that take two files: + * + * ?- | boolean | True if file1 is a hard link to file2 + * ?= | boolean | True if the modification times of file1 + * | | and file2 are equal + * ?< | boolean | True if the modification time of file1 + * | | is prior to that of file2 + * ?> | boolean | True if the modification time of file1 + * | | is after that of file2 + */ + +static VALUE +rb_f_test(argc, argv) + int argc; + VALUE *argv; +{ + int cmd; + + if (argc == 0) rb_raise(rb_eArgError, "wrong number of arguments"); +#if 0 /* 1.7 behavior? */ + if (argc == 1) { + return RTEST(argv[0]) ? Qtrue : Qfalse; + } +#endif + cmd = NUM2CHR(argv[0]); + if (cmd == 0) return Qfalse; + if (strchr("bcdefgGkloOprRsSuwWxXz", cmd)) { + CHECK(1); + switch (cmd) { + case 'b': + return test_b(0, argv[1]); + + case 'c': + return test_c(0, argv[1]); + + case 'd': + return test_d(0, argv[1]); + + case 'a': + case 'e': + return test_e(0, argv[1]); + + case 'f': + return test_f(0, argv[1]); + + case 'g': + return test_sgid(0, argv[1]); + + case 'G': + return test_grpowned(0, argv[1]); + + case 'k': + return test_sticky(0, argv[1]); + + case 'l': + return test_l(0, argv[1]); + + case 'o': + return test_owned(0, argv[1]); + + case 'O': + return test_rowned(0, argv[1]); + + case 'p': + return test_p(0, argv[1]); + + case 'r': + return test_r(0, argv[1]); + + case 'R': + return test_R(0, argv[1]); + + case 's': + return test_s(0, argv[1]); + + case 'S': + return test_S(0, argv[1]); + + case 'u': + return test_suid(0, argv[1]); + + case 'w': + return test_w(0, argv[1]); + + case 'W': + return test_W(0, argv[1]); + + case 'x': + return test_x(0, argv[1]); + + case 'X': + return test_X(0, argv[1]); + + case 'z': + return test_z(0, argv[1]); + } + } + + if (strchr("MAC", cmd)) { + struct stat st; + + CHECK(1); + if (rb_stat(argv[1], &st) == -1) { + rb_sys_fail(RSTRING(argv[1])->ptr); + } + + switch (cmd) { + case 'A': + return rb_time_new(st.st_atime, 0); + case 'M': + return rb_time_new(st.st_mtime, 0); + case 'C': + return rb_time_new(st.st_ctime, 0); + } + } + + if (strchr("-=<>", cmd)) { + struct stat st1, st2; + + CHECK(2); + if (rb_stat(argv[1], &st1) < 0) return Qfalse; + if (rb_stat(argv[2], &st2) < 0) return Qfalse; + + switch (cmd) { + case '-': + if (st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino) + return Qtrue; + return Qfalse; + + case '=': + if (st1.st_mtime == st2.st_mtime) return Qtrue; + return Qfalse; + + case '>': + if (st1.st_mtime > st2.st_mtime) return Qtrue; + return Qfalse; + + case '<': + if (st1.st_mtime < st2.st_mtime) return Qtrue; + return Qfalse; + } + } + /* unknown command */ + rb_raise(rb_eArgError, "unknown command ?%c", cmd); + return Qnil; /* not reached */ +} + + + +/* + * Document-class: File::Stat + * + * Objects of class <code>File::Stat</code> encapsulate common status + * information for <code>File</code> objects. The information is + * recorded at the moment the <code>File::Stat</code> object is + * created; changes made to the file after that point will not be + * reflected. <code>File::Stat</code> objects are returned by + * <code>IO#stat</code>, <code>File::stat</code>, + * <code>File#lstat</code>, and <code>File::lstat</code>. Many of these + * methods return platform-specific values, and not all values are + * meaningful on all systems. See also <code>Kernel#test</code>. + */ + +static VALUE rb_stat_s_alloc _((VALUE)); +static VALUE +rb_stat_s_alloc(klass) + VALUE klass; +{ + return stat_new_0(klass, 0); +} + +/* + * call-seq: + * + * File::Stat.new(file_name) => stat + * + * Create a File::Stat object for the given file name (raising an + * exception if the file doesn't exist). + */ + +static VALUE +rb_stat_init(obj, fname) + VALUE obj, fname; +{ + struct stat st, *nst; + + rb_secure(2); + FilePathValue(fname); + if (stat(StringValueCStr(fname), &st) == -1) { + rb_sys_fail(RSTRING(fname)->ptr); + } + if (DATA_PTR(obj)) { + free(DATA_PTR(obj)); + DATA_PTR(obj) = NULL; + } + nst = ALLOC(struct stat); + *nst = st; + DATA_PTR(obj) = nst; + + return Qnil; +} + +/* :nodoc: */ +static VALUE +rb_stat_init_copy(copy, orig) + VALUE copy, orig; +{ + struct stat *nst; + + if (copy == orig) return orig; + rb_check_frozen(copy); + /* need better argument type check */ + if (!rb_obj_is_instance_of(orig, rb_obj_class(copy))) { + rb_raise(rb_eTypeError, "wrong argument class"); + } + if (DATA_PTR(copy)) { + free(DATA_PTR(copy)); + DATA_PTR(copy) = 0; + } + if (DATA_PTR(orig)) { + nst = ALLOC(struct stat); + *nst = *(struct stat*)DATA_PTR(orig); + DATA_PTR(copy) = nst; + } + + return copy; +} + +/* + * call-seq: + * stat.ftype => string + * + * Identifies the type of <i>stat</i>. The return string is one of: + * ``<code>file</code>'', ``<code>directory</code>'', + * ``<code>characterSpecial</code>'', ``<code>blockSpecial</code>'', + * ``<code>fifo</code>'', ``<code>link</code>'', + * ``<code>socket</code>'', or ``<code>unknown</code>''. + * + * File.stat("/dev/tty").ftype #=> "characterSpecial" + * + */ + +static VALUE +rb_stat_ftype(obj) + VALUE obj; +{ + return rb_file_ftype(get_stat(obj)); +} + +/* + * call-seq: + * stat.directory? => true or false + * + * Returns <code>true</code> if <i>stat</i> is a directory, + * <code>false</code> otherwise. + * + * File.stat("testfile").directory? #=> false + * File.stat(".").directory? #=> true + */ + +static VALUE +rb_stat_d(obj) + VALUE obj; +{ + if (S_ISDIR(get_stat(obj)->st_mode)) return Qtrue; + return Qfalse; +} + +/* + * call-seq: + * stat.pipe? => true or false + * + * Returns <code>true</code> if the operating system supports pipes and + * <i>stat</i> is a pipe; <code>false</code> otherwise. + */ + +static VALUE +rb_stat_p(obj) + VALUE obj; +{ +#ifdef S_IFIFO + if (S_ISFIFO(get_stat(obj)->st_mode)) return Qtrue; + +#endif + return Qfalse; +} + +/* + * call-seq: + * stat.symlink? => true or false + * + * Returns <code>true</code> if <i>stat</i> is a symbolic link, + * <code>false</code> if it isn't or if the operating system doesn't + * support this feature. As <code>File::stat</code> automatically + * follows symbolic links, <code>symlink?</code> will always be + * <code>false</code> for an object returned by + * <code>File::stat</code>. + * + * File.symlink("testfile", "alink") #=> 0 + * File.stat("alink").symlink? #=> false + * File.lstat("alink").symlink? #=> true + * + */ + +static VALUE +rb_stat_l(obj) + VALUE obj; +{ +#ifdef S_ISLNK + if (S_ISLNK(get_stat(obj)->st_mode)) return Qtrue; +#endif + return Qfalse; +} + +/* + * call-seq: + * stat.socket? => true or false + * + * Returns <code>true</code> if <i>stat</i> is a socket, + * <code>false</code> if it isn't or if the operating system doesn't + * support this feature. + * + * File.stat("testfile").socket? #=> false + * + */ + +static VALUE +rb_stat_S(obj) + VALUE obj; +{ +#ifdef S_ISSOCK + if (S_ISSOCK(get_stat(obj)->st_mode)) return Qtrue; + +#endif + return Qfalse; +} + +/* + * call-seq: + * stat.blockdev? => true or false + * + * Returns <code>true</code> if the file is a block device, + * <code>false</code> if it isn't or if the operating system doesn't + * support this feature. + * + * File.stat("testfile").blockdev? #=> false + * File.stat("/dev/hda1").blockdev? #=> true + * + */ + +static VALUE +rb_stat_b(obj) + VALUE obj; +{ +#ifdef S_ISBLK + if (S_ISBLK(get_stat(obj)->st_mode)) return Qtrue; + +#endif + return Qfalse; +} + +/* + * call-seq: + * stat.chardev? => true or false + * + * Returns <code>true</code> if the file is a character device, + * <code>false</code> if it isn't or if the operating system doesn't + * support this feature. + * + * File.stat("/dev/tty").chardev? #=> true + * + */ + +static VALUE +rb_stat_c(obj) + VALUE obj; +{ + if (S_ISCHR(get_stat(obj)->st_mode)) return Qtrue; + + return Qfalse; +} + +/* + * call-seq: + * stat.owned? => true or false + * + * Returns <code>true</code> if the effective user id of the process is + * the same as the owner of <i>stat</i>. + * + * File.stat("testfile").owned? #=> true + * File.stat("/etc/passwd").owned? #=> false + * + */ + +static VALUE +rb_stat_owned(obj) + VALUE obj; +{ + if (get_stat(obj)->st_uid == geteuid()) return Qtrue; + return Qfalse; +} + +static VALUE +rb_stat_rowned(obj) + VALUE obj; +{ + if (get_stat(obj)->st_uid == getuid()) return Qtrue; + return Qfalse; +} + +/* + * call-seq: + * stat.grpowned? => true or false + * + * Returns true if the effective group id of the process is the same as + * the group id of <i>stat</i>. On Windows NT, returns <code>false</code>. + * + * File.stat("testfile").grpowned? #=> true + * File.stat("/etc/passwd").grpowned? #=> false + * + */ + +static VALUE +rb_stat_grpowned(obj) + VALUE obj; +{ +#ifndef _WIN32 + if (get_stat(obj)->st_gid == getegid()) return Qtrue; +#endif + return Qfalse; +} + +/* + * call-seq: + * stat.readable? => true or false + * + * Returns <code>true</code> if <i>stat</i> is readable by the + * effective user id of this process. + * + * File.stat("testfile").readable? #=> true + * + */ + +static VALUE +rb_stat_r(obj) + VALUE obj; +{ + struct stat *st = get_stat(obj); + +#ifdef S_IRUSR + if (rb_stat_owned(obj)) + return st->st_mode & S_IRUSR ? Qtrue : Qfalse; +#endif +#ifdef S_IRGRP + if (rb_stat_grpowned(obj)) + return st->st_mode & S_IRGRP ? Qtrue : Qfalse; +#endif +#ifdef S_IROTH + if (!(st->st_mode & S_IROTH)) return Qfalse; +#endif + return Qtrue; +} + + + +/* + * call-seq: + * stat.readable_real? -> true or false + * + * Returns <code>true</code> if <i>stat</i> is readable by the real + * user id of this process. + * + * File.stat("testfile").readable_real? #=> true + * + */ + +static VALUE +rb_stat_R(obj) + VALUE obj; +{ + struct stat *st = get_stat(obj); + +#ifdef S_IRUSR + if (rb_stat_rowned(obj)) + return st->st_mode & S_IRUSR ? Qtrue : Qfalse; +#endif +#ifdef S_IRGRP + if (group_member(get_stat(obj)->st_gid)) + return st->st_mode & S_IRGRP ? Qtrue : Qfalse; +#endif +#ifdef S_IROTH + if (!(st->st_mode & S_IROTH)) return Qfalse; +#endif + return Qtrue; +} + +/* + * call-seq: + * stat.world_readable? => fixnum or nil + * + * If <i>stat</i> is readable by others, returns an integer + * representing the file permission bits of <i>stat</i>. Returns + * <code>nil</code> otherwise. The meaning of the bits is platform + * dependent; on Unix systems, see <code>stat(2)</code>. + * + * m = File.stat("/etc/passwd").world_readable? # => 420 + * sprintf("%o", m) # => "644" + */ + +static VALUE +rb_stat_wr(obj) + VALUE obj; +{ +#ifdef S_IROTH + if ((get_stat(obj)->st_mode & (S_IROTH)) == S_IROTH) { + return UINT2NUM(get_stat(obj)->st_mode & (S_IRUGO|S_IWUGO|S_IXUGO)); + } + else { + return Qnil; + } +#endif +} + +/* + * call-seq: + * stat.writable? -> true or false + * + * Returns <code>true</code> if <i>stat</i> is writable by the + * effective user id of this process. + * + * File.stat("testfile").writable? #=> true + * + */ + +static VALUE +rb_stat_w(obj) + VALUE obj; +{ + struct stat *st = get_stat(obj); + +#ifdef S_IWUSR + if (rb_stat_owned(obj)) + return st->st_mode & S_IWUSR ? Qtrue : Qfalse; +#endif +#ifdef S_IWGRP + if (rb_stat_grpowned(obj)) + return st->st_mode & S_IWGRP ? Qtrue : Qfalse; +#endif +#ifdef S_IWOTH + if (!(st->st_mode & S_IWOTH)) return Qfalse; +#endif + return Qtrue; +} + +/* + * call-seq: + * stat.writable_real? -> true or false + * + * Returns <code>true</code> if <i>stat</i> is writable by the real + * user id of this process. + * + * File.stat("testfile").writable_real? #=> true + * + */ + +static VALUE +rb_stat_W(obj) + VALUE obj; +{ + struct stat *st = get_stat(obj); + +#ifdef S_IWUSR + if (rb_stat_rowned(obj)) + return st->st_mode & S_IWUSR ? Qtrue : Qfalse; +#endif +#ifdef S_IWGRP + if (group_member(get_stat(obj)->st_gid)) + return st->st_mode & S_IWGRP ? Qtrue : Qfalse; +#endif +#ifdef S_IWOTH + if (!(st->st_mode & S_IWOTH)) return Qfalse; +#endif + return Qtrue; +} + +/* + * call-seq: + * stat.world_writable? => fixnum or nil + * + * If <i>stat</i> is writable by others, returns an integer + * representing the file permission bits of <i>stat</i>. Returns + * <code>nil</code> otherwise. The meaning of the bits is platform + * dependent; on Unix systems, see <code>stat(2)</code>. + * + * m = File.stat("/tmp").world_writable? # => 511 + * sprintf("%o", m) # => "777" + */ + +static VALUE +rb_stat_ww(obj) + VALUE obj; +{ +#ifdef S_IROTH + if ((get_stat(obj)->st_mode & (S_IWOTH)) == S_IWOTH) { + return UINT2NUM(get_stat(obj)->st_mode & (S_IRUGO|S_IWUGO|S_IXUGO)); + } + else { + return Qnil; + } +#endif +} + +/* + * call-seq: + * stat.executable? => true or false + * + * Returns <code>true</code> if <i>stat</i> is executable or if the + * operating system doesn't distinguish executable files from + * nonexecutable files. The tests are made using the effective owner of + * the process. + * + * File.stat("testfile").executable? #=> false + * + */ + +static VALUE +rb_stat_x(obj) + VALUE obj; +{ + struct stat *st = get_stat(obj); + +#ifdef S_IXUSR + if (rb_stat_owned(obj)) + return st->st_mode & S_IXUSR ? Qtrue : Qfalse; +#endif +#ifdef S_IXGRP + if (rb_stat_grpowned(obj)) + return st->st_mode & S_IXGRP ? Qtrue : Qfalse; +#endif +#ifdef S_IXOTH + if (!(st->st_mode & S_IXOTH)) return Qfalse; +#endif + return Qtrue; +} + +/* + * call-seq: + * stat.executable_real? => true or false + * + * Same as <code>executable?</code>, but tests using the real owner of + * the process. + */ + + +static VALUE +rb_stat_X(obj) + VALUE obj; +{ + struct stat *st = get_stat(obj); + +#ifdef S_IXUSR + if (rb_stat_rowned(obj)) + return st->st_mode & S_IXUSR ? Qtrue : Qfalse; +#endif +#ifdef S_IXGRP + if (group_member(get_stat(obj)->st_gid)) + return st->st_mode & S_IXGRP ? Qtrue : Qfalse; +#endif +#ifdef S_IXOTH + if (!(st->st_mode & S_IXOTH)) return Qfalse; +#endif + return Qtrue; +} + +/* + * call-seq: + * stat.file? => true or false + * + * Returns <code>true</code> if <i>stat</i> is a regular file (not + * a device file, pipe, socket, etc.). + * + * File.stat("testfile").file? #=> true + * + */ + +static VALUE +rb_stat_f(obj) + VALUE obj; +{ + if (S_ISREG(get_stat(obj)->st_mode)) return Qtrue; + return Qfalse; +} + +/* + * call-seq: + * stat.zero? => true or false + * + * Returns <code>true</code> if <i>stat</i> is a zero-length file; + * <code>false</code> otherwise. + * + * File.stat("testfile").zero? #=> false + * + */ + +static VALUE +rb_stat_z(obj) + VALUE obj; +{ + if (get_stat(obj)->st_size == 0) return Qtrue; + return Qfalse; +} + + +/* + * call-seq: + * state.size => integer + * + * Returns the size of <i>stat</i> in bytes. + * + * File.stat("testfile").size #=> 66 + * + */ + +static VALUE +rb_stat_s(obj) + VALUE obj; +{ + off_t size = get_stat(obj)->st_size; + + if (size == 0) return Qnil; + return OFFT2NUM(size); +} + +/* + * call-seq: + * stat.setuid? => true or false + * + * Returns <code>true</code> if <i>stat</i> has the set-user-id + * permission bit set, <code>false</code> if it doesn't or if the + * operating system doesn't support this feature. + * + * File.stat("/bin/su").setuid? #=> true + */ + +static VALUE +rb_stat_suid(obj) + VALUE obj; +{ +#ifdef S_ISUID + if (get_stat(obj)->st_mode & S_ISUID) return Qtrue; +#endif + return Qfalse; +} + +/* + * call-seq: + * stat.setgid? => true or false + * + * Returns <code>true</code> if <i>stat</i> has the set-group-id + * permission bit set, <code>false</code> if it doesn't or if the + * operating system doesn't support this feature. + * + * File.stat("/usr/sbin/lpc").setgid? #=> true + * + */ + +static VALUE +rb_stat_sgid(obj) + VALUE obj; +{ +#ifdef S_ISGID + if (get_stat(obj)->st_mode & S_ISGID) return Qtrue; +#endif + return Qfalse; +} + +/* + * call-seq: + * stat.sticky? => true or false + * + * Returns <code>true</code> if <i>stat</i> has its sticky bit set, + * <code>false</code> if it doesn't or if the operating system doesn't + * support this feature. + * + * File.stat("testfile").sticky? #=> false + * + */ + +static VALUE +rb_stat_sticky(obj) + VALUE obj; +{ +#ifdef S_ISVTX + if (get_stat(obj)->st_mode & S_ISVTX) return Qtrue; +#endif + return Qfalse; +} + +static VALUE rb_mFConst; + +void +rb_file_const(name, value) + const char *name; + VALUE value; +{ + rb_define_const(rb_mFConst, name, value); +} + +static int +is_absolute_path(path) + const char *path; +{ +#ifdef DOSISH_DRIVE_LETTER + if (has_drive_letter(path) && isdirsep(path[2])) return 1; +#endif +#ifdef DOSISH_UNC + if (isdirsep(path[0]) && isdirsep(path[1])) return 1; +#endif +#ifndef DOSISH + if (path[0] == '/') return 1; +#endif + return 0; +} + +#ifndef DOSISH +static int +path_check_1(path) + VALUE path; +{ + struct stat st; + char *p0 = StringValueCStr(path); + char *p = 0, *s; + + if (!is_absolute_path(p0)) { + char *buf = my_getcwd(); + VALUE newpath; + + newpath = rb_str_new2(buf); + free(buf); + + rb_str_cat2(newpath, "/"); + rb_str_cat2(newpath, p0); + return path_check_1(newpath); + } + for (;;) { +#ifndef S_IWOTH +# define S_IWOTH 002 +#endif + if (stat(p0, &st) == 0 && S_ISDIR(st.st_mode) && (st.st_mode & S_IWOTH) +#ifdef S_ISVTX + && !(st.st_mode & S_ISVTX) +#endif + && !access(p0, W_OK)) { + rb_warn("Insecure world writable dir %s, mode 0%o", p0, st.st_mode); + if (p) *p = '/'; + return 0; + } + s = strrdirsep(p0); + if (p) *p = '/'; + if (!s || s == p0) return 1; + p = s; + *p = '\0'; + } +} +#endif + +int +rb_path_check(path) + char *path; +{ +#ifndef DOSISH + char *p0, *p, *pend; + const char sep = PATH_SEP_CHAR; + + if (!path) return 1; + + pend = path + strlen(path); + p0 = path; + p = strchr(path, sep); + if (!p) p = pend; + + for (;;) { + if (!path_check_1(rb_str_new(p0, p - p0))) { + return 0; /* not safe */ + } + p0 = p + 1; + if (p0 > pend) break; + p = strchr(p0, sep); + if (!p) p = pend; + } +#endif + return 1; +} + +#if defined(__MACOS__) || defined(riscos) +static int +is_macos_native_path(path) + const char *path; +{ + if (strchr(path, ':')) return 1; + return 0; +} +#endif + +static int +file_load_ok(file) + char *file; +{ + FILE *f; + + if (!file) return 0; + f = fopen(file, "r"); + if (f == NULL) return 0; + fclose(f); + return 1; +} + +extern VALUE rb_load_path; + +int +rb_find_file_ext(filep, ext) + VALUE *filep; + const char * const *ext; +{ + char *path, *found; + char *f = RSTRING(*filep)->ptr; + VALUE fname; + long i, j; + + if (f[0] == '~') { + fname = rb_file_expand_path(*filep, Qnil); + if (rb_safe_level() >= 2 && OBJ_TAINTED(fname)) { + rb_raise(rb_eSecurityError, "loading from unsafe file %s", f); + } + OBJ_FREEZE(fname); + f = StringValueCStr(fname); + *filep = fname; + } + + if (is_absolute_path(f)) { + for (i=0; ext[i]; i++) { + fname = rb_str_dup(*filep); + rb_str_cat2(fname, ext[i]); + OBJ_FREEZE(fname); + if (file_load_ok(StringValueCStr(fname))) { + *filep = fname; + return i+1; + } + } + return 0; + } + + if (!rb_load_path) return 0; + + Check_Type(rb_load_path, T_ARRAY); + for (i=0;i<RARRAY(rb_load_path)->len;i++) { + VALUE str = RARRAY(rb_load_path)->ptr[i]; + + FilePathValue(str); + if (RSTRING(str)->len == 0) continue; + path = RSTRING(str)->ptr; + for (j=0; ext[j]; j++) { + fname = rb_str_dup(*filep); + rb_str_cat2(fname, ext[j]); + OBJ_FREEZE(fname); + found = dln_find_file(StringValueCStr(fname), path); + if (found && file_load_ok(found)) { + *filep = rb_str_new2(found); + return j+1; + } + } + } + return 0; +} + +VALUE +rb_find_file(path) + VALUE path; +{ + VALUE tmp; + char *f = StringValueCStr(path); + char *lpath; + + if (f[0] == '~') { + path = rb_file_expand_path(path, Qnil); + if (rb_safe_level() >= 1 && OBJ_TAINTED(path)) { + rb_raise(rb_eSecurityError, "loading from unsafe path %s", f); + } + OBJ_FREEZE(path); + f = StringValueCStr(path); + } + +#if defined(__MACOS__) || defined(riscos) + if (is_macos_native_path(f)) { + if (rb_safe_level() >= 1 && !rb_path_check(f)) { + rb_raise(rb_eSecurityError, "loading from unsafe file %s", f); + } + if (file_load_ok(f)) return path; + } +#endif + + if (is_absolute_path(f)) { + if (rb_safe_level() >= 1 && !rb_path_check(f)) { + rb_raise(rb_eSecurityError, "loading from unsafe file %s", f); + } + if (file_load_ok(f)) return path; + } + + if (rb_safe_level() >= 4) { + rb_raise(rb_eSecurityError, "loading from non-absolute path %s", f); + } + + if (rb_load_path) { + long i; + + Check_Type(rb_load_path, T_ARRAY); + tmp = rb_ary_new(); + for (i=0;i<RARRAY(rb_load_path)->len;i++) { + VALUE str = RARRAY(rb_load_path)->ptr[i]; + FilePathValue(str); + if (RSTRING(str)->len > 0) { + rb_ary_push(tmp, str); + } + } + tmp = rb_ary_join(tmp, rb_str_new2(PATH_SEP)); + if (RSTRING(tmp)->len == 0) { + lpath = 0; + } + else { + lpath = RSTRING(tmp)->ptr; + if (rb_safe_level() >= 1 && !rb_path_check(lpath)) { + rb_raise(rb_eSecurityError, "loading from unsafe path %s", lpath); + } + } + } + else { + lpath = 0; + } + + if (!lpath) { + return 0; /* no path, no load */ + } + f = dln_find_file(f, lpath); + if (rb_safe_level() >= 1 && !rb_path_check(f)) { + rb_raise(rb_eSecurityError, "loading from unsafe file %s", f); + } + if (file_load_ok(f)) { + tmp = rb_str_new2(f); + OBJ_FREEZE(tmp); + return tmp; + } + return 0; +} + +static void +define_filetest_function(name, func, argc) + const char *name; + VALUE (*func)(); + int argc; +{ + rb_define_module_function(rb_mFileTest, name, func, argc); + rb_define_singleton_method(rb_cFile, name, func, argc); +} + + +/* + * A <code>File</code> is an abstraction of any file object accessible + * by the program and is closely associated with class <code>IO</code> + * <code>File</code> includes the methods of module + * <code>FileTest</code> as class methods, allowing you to write (for + * example) <code>File.exist?("foo")</code>. + * + * In the description of File methods, + * <em>permission bits</em> are a platform-specific + * set of bits that indicate permissions of a file. On Unix-based + * systems, permissions are viewed as a set of three octets, for the + * owner, the group, and the rest of the world. For each of these + * entities, permissions may be set to read, write, or execute the + * file: + * + * The permission bits <code>0644</code> (in octal) would thus be + * interpreted as read/write for owner, and read-only for group and + * other. Higher-order bits may also be used to indicate the type of + * file (plain, directory, pipe, socket, and so on) and various other + * special features. If the permissions are for a directory, the + * meaning of the execute bit changes; when set the directory can be + * searched. + * + * On non-Posix operating systems, there may be only the ability to + * make a file read-only or read-write. In this case, the remaining + * permission bits will be synthesized to resemble typical values. For + * instance, on Windows NT the default permission bits are + * <code>0644</code>, which means read/write for owner, read-only for + * all others. The only change that can be made is to make the file + * read-only, which is reported as <code>0444</code>. + */ + +void +Init_File() +{ + rb_mFileTest = rb_define_module("FileTest"); + rb_cFile = rb_define_class("File", rb_cIO); + + define_filetest_function("directory?", test_d, 1); + define_filetest_function("exist?", test_e, 1); + define_filetest_function("exists?", test_e, 1); /* temporary */ + define_filetest_function("readable?", test_r, 1); + define_filetest_function("readable_real?", test_R, 1); + define_filetest_function("world_readable?", test_wr, 1); + define_filetest_function("writable?", test_w, 1); + define_filetest_function("writable_real?", test_W, 1); + define_filetest_function("world_writable?", test_ww, 1); + define_filetest_function("executable?", test_x, 1); + define_filetest_function("executable_real?", test_X, 1); + define_filetest_function("file?", test_f, 1); + define_filetest_function("zero?", test_z, 1); + define_filetest_function("size?", test_s, 1); + define_filetest_function("size", rb_file_s_size, 1); + define_filetest_function("owned?", test_owned, 1); + define_filetest_function("grpowned?", test_grpowned, 1); + + define_filetest_function("pipe?", test_p, 1); + define_filetest_function("symlink?", test_l, 1); + define_filetest_function("socket?", test_S, 1); + + define_filetest_function("blockdev?", test_b, 1); + define_filetest_function("chardev?", test_c, 1); + + define_filetest_function("setuid?", test_suid, 1); + define_filetest_function("setgid?", test_sgid, 1); + define_filetest_function("sticky?", test_sticky, 1); + + rb_define_singleton_method(rb_cFile, "stat", rb_file_s_stat, 1); + rb_define_singleton_method(rb_cFile, "lstat", rb_file_s_lstat, 1); + rb_define_singleton_method(rb_cFile, "ftype", rb_file_s_ftype, 1); + + rb_define_singleton_method(rb_cFile, "atime", rb_file_s_atime, 1); + rb_define_singleton_method(rb_cFile, "mtime", rb_file_s_mtime, 1); + rb_define_singleton_method(rb_cFile, "ctime", rb_file_s_ctime, 1); + + rb_define_singleton_method(rb_cFile, "utime", rb_file_s_utime, -1); + rb_define_singleton_method(rb_cFile, "chmod", rb_file_s_chmod, -1); + rb_define_singleton_method(rb_cFile, "chown", rb_file_s_chown, -1); + rb_define_singleton_method(rb_cFile, "lchmod", rb_file_s_lchmod, -1); + rb_define_singleton_method(rb_cFile, "lchown", rb_file_s_lchown, -1); + + rb_define_singleton_method(rb_cFile, "link", rb_file_s_link, 2); + rb_define_singleton_method(rb_cFile, "symlink", rb_file_s_symlink, 2); + rb_define_singleton_method(rb_cFile, "readlink", rb_file_s_readlink, 1); + + rb_define_singleton_method(rb_cFile, "unlink", rb_file_s_unlink, -2); + rb_define_singleton_method(rb_cFile, "delete", rb_file_s_unlink, -2); + rb_define_singleton_method(rb_cFile, "rename", rb_file_s_rename, 2); + rb_define_singleton_method(rb_cFile, "umask", rb_file_s_umask, -1); + rb_define_singleton_method(rb_cFile, "truncate", rb_file_s_truncate, 2); + rb_define_singleton_method(rb_cFile, "expand_path", rb_file_s_expand_path, -1); + rb_define_singleton_method(rb_cFile, "basename", rb_file_s_basename, -1); + rb_define_singleton_method(rb_cFile, "dirname", rb_file_s_dirname, 1); + rb_define_singleton_method(rb_cFile, "extname", rb_file_s_extname, 1); + rb_define_singleton_method(rb_cFile, "path", rb_file_s_path, 1); + + separator = rb_obj_freeze(rb_str_new2("/")); + rb_define_const(rb_cFile, "Separator", separator); + rb_define_const(rb_cFile, "SEPARATOR", separator); + rb_define_singleton_method(rb_cFile, "split", rb_file_s_split, 1); + rb_define_singleton_method(rb_cFile, "join", rb_file_s_join, -2); + +#ifdef DOSISH + rb_define_const(rb_cFile, "ALT_SEPARATOR", rb_obj_freeze(rb_str_new2("\\"))); +#else + rb_define_const(rb_cFile, "ALT_SEPARATOR", Qnil); +#endif + rb_define_const(rb_cFile, "PATH_SEPARATOR", rb_obj_freeze(rb_str_new2(PATH_SEP))); + + rb_define_method(rb_cIO, "stat", rb_io_stat, 0); /* this is IO's method */ + rb_define_method(rb_cFile, "lstat", rb_file_lstat, 0); + + rb_define_method(rb_cFile, "atime", rb_file_atime, 0); + rb_define_method(rb_cFile, "mtime", rb_file_mtime, 0); + rb_define_method(rb_cFile, "ctime", rb_file_ctime, 0); + + rb_define_method(rb_cFile, "chmod", rb_file_chmod, 1); + rb_define_method(rb_cFile, "chown", rb_file_chown, 2); + rb_define_method(rb_cFile, "truncate", rb_file_truncate, 1); + + rb_define_method(rb_cFile, "flock", rb_file_flock, 1); + + rb_mFConst = rb_define_module_under(rb_cFile, "Constants"); + rb_include_module(rb_cIO, rb_mFConst); + rb_file_const("LOCK_SH", INT2FIX(LOCK_SH)); + rb_file_const("LOCK_EX", INT2FIX(LOCK_EX)); + rb_file_const("LOCK_UN", INT2FIX(LOCK_UN)); + rb_file_const("LOCK_NB", INT2FIX(LOCK_NB)); + + rb_define_method(rb_cFile, "path", rb_file_path, 0); + rb_define_global_function("test", rb_f_test, -1); + + rb_cStat = rb_define_class_under(rb_cFile, "Stat", rb_cObject); + rb_define_alloc_func(rb_cStat, rb_stat_s_alloc); + rb_define_method(rb_cStat, "initialize", rb_stat_init, 1); + rb_define_method(rb_cStat, "initialize_copy", rb_stat_init_copy, 1); + + rb_include_module(rb_cStat, rb_mComparable); + + rb_define_method(rb_cStat, "<=>", rb_stat_cmp, 1); + + rb_define_method(rb_cStat, "dev", rb_stat_dev, 0); + rb_define_method(rb_cStat, "dev_major", rb_stat_dev_major, 0); + rb_define_method(rb_cStat, "dev_minor", rb_stat_dev_minor, 0); + rb_define_method(rb_cStat, "ino", rb_stat_ino, 0); + rb_define_method(rb_cStat, "mode", rb_stat_mode, 0); + rb_define_method(rb_cStat, "nlink", rb_stat_nlink, 0); + rb_define_method(rb_cStat, "uid", rb_stat_uid, 0); + rb_define_method(rb_cStat, "gid", rb_stat_gid, 0); + rb_define_method(rb_cStat, "rdev", rb_stat_rdev, 0); + rb_define_method(rb_cStat, "rdev_major", rb_stat_rdev_major, 0); + rb_define_method(rb_cStat, "rdev_minor", rb_stat_rdev_minor, 0); + rb_define_method(rb_cStat, "size", rb_stat_size, 0); + rb_define_method(rb_cStat, "blksize", rb_stat_blksize, 0); + rb_define_method(rb_cStat, "blocks", rb_stat_blocks, 0); + rb_define_method(rb_cStat, "atime", rb_stat_atime, 0); + rb_define_method(rb_cStat, "mtime", rb_stat_mtime, 0); + rb_define_method(rb_cStat, "ctime", rb_stat_ctime, 0); + + rb_define_method(rb_cStat, "inspect", rb_stat_inspect, 0); + + rb_define_method(rb_cStat, "ftype", rb_stat_ftype, 0); + + rb_define_method(rb_cStat, "directory?", rb_stat_d, 0); + rb_define_method(rb_cStat, "readable?", rb_stat_r, 0); + rb_define_method(rb_cStat, "readable_real?", rb_stat_R, 0); + rb_define_method(rb_cStat, "world_readable?", rb_stat_wr, 0); + rb_define_method(rb_cStat, "writable?", rb_stat_w, 0); + rb_define_method(rb_cStat, "writable_real?", rb_stat_W, 0); + rb_define_method(rb_cStat, "world_writable?", rb_stat_ww, 0); + rb_define_method(rb_cStat, "executable?", rb_stat_x, 0); + rb_define_method(rb_cStat, "executable_real?", rb_stat_X, 0); + rb_define_method(rb_cStat, "file?", rb_stat_f, 0); + rb_define_method(rb_cStat, "zero?", rb_stat_z, 0); + rb_define_method(rb_cStat, "size?", rb_stat_s, 0); + rb_define_method(rb_cStat, "owned?", rb_stat_owned, 0); + rb_define_method(rb_cStat, "grpowned?", rb_stat_grpowned, 0); + + rb_define_method(rb_cStat, "pipe?", rb_stat_p, 0); + rb_define_method(rb_cStat, "symlink?", rb_stat_l, 0); + rb_define_method(rb_cStat, "socket?", rb_stat_S, 0); + + rb_define_method(rb_cStat, "blockdev?", rb_stat_b, 0); + rb_define_method(rb_cStat, "chardev?", rb_stat_c, 0); + + rb_define_method(rb_cStat, "setuid?", rb_stat_suid, 0); + rb_define_method(rb_cStat, "setgid?", rb_stat_sgid, 0); + rb_define_method(rb_cStat, "sticky?", rb_stat_sticky, 0); +} +/********************************************************************** + + gc.c - + + $Author: nobu $ + $Date: 2005/04/30 02:59:41 $ + created at: Tue Oct 5 09:44:46 JST 1993 + + Copyright (C) 1993-2003 Yukihiro Matsumoto + Copyright (C) 2000 Network Applied Communication Laboratory, Inc. + Copyright (C) 2000 Information-technology Promotion Agency, Japan + +**********************************************************************/ + +#include "ruby.h" +#include "rubysig.h" +#include "st.h" +#include "node.h" +#include "env.h" +#include "re.h" +#include <stdio.h> +#include <setjmp.h> +#include <sys/types.h> + +#ifdef HAVE_SYS_TIME_H +#include <sys/time.h> +#endif + +#ifdef HAVE_SYS_RESOURCE_H +#include <sys/resource.h> +#endif + +#ifdef __ia64__ +#include <ucontext.h> +#if defined(__FreeBSD__) +/* + * FreeBSD/ia64 currently does not have a way for a process to get the + * base address for the RSE backing store, so hardcode it. + */ +#define __libc_ia64_register_backing_store_base (4ULL<<61) +#else +#ifdef HAVE_UNWIND_H +#include <unwind.h> +#else +#pragma weak __libc_ia64_register_backing_store_base +extern unsigned long __libc_ia64_register_backing_store_base; +#endif +#endif +#endif + +#if defined _WIN32 || defined __CYGWIN__ +#include <windows.h> +#endif + +int rb_io_fptr_finalize _((struct OpenFile*)); + +#if !defined(setjmp) && defined(HAVE__SETJMP) +#define setjmp(env) _setjmp(env) +#endif + +/* Make alloca work the best possible way. */ +#ifdef __GNUC__ +# ifndef atarist +# ifndef alloca +# define alloca __builtin_alloca +# endif +# endif /* atarist */ +#else +# ifdef HAVE_ALLOCA_H +# include <alloca.h> +# else +# ifdef _AIX + #pragma alloca +# else +# ifndef alloca /* predefined by HP cc +Olibcalls */ +void *alloca (); +# endif +# endif /* AIX */ +# endif /* HAVE_ALLOCA_H */ +#endif /* __GNUC__ */ + +#ifndef GC_MALLOC_LIMIT +#if defined(MSDOS) || defined(__human68k__) +#define GC_MALLOC_LIMIT 200000 +#else +#define GC_MALLOC_LIMIT 8000000 +#endif +#endif + +static unsigned long malloc_increase = 0; +static unsigned long malloc_limit = GC_MALLOC_LIMIT; +static void run_final(); +static VALUE nomem_error; +static void garbage_collect(); + +void +rb_memerror() +{ + static int recurse = 0; + + if (recurse > 0 && rb_safe_level() < 4) { + fprintf(stderr, "[FATAL] failed to allocate memory\n"); + exit(1); + } + recurse++; + rb_exc_raise(nomem_error); +} + +void * +ruby_xmalloc(size) + long size; +{ + void *mem; + + if (size < 0) { + rb_raise(rb_eNoMemError, "negative allocation size (or too big)"); + } + if (size == 0) size = 1; + malloc_increase += size; + + if (malloc_increase > malloc_limit) { + garbage_collect(); + } + RUBY_CRITICAL(mem = malloc(size)); + if (!mem) { + garbage_collect(); + RUBY_CRITICAL(mem = malloc(size)); + if (!mem) { + rb_memerror(); + } + } + + return mem; +} + +void * +ruby_xcalloc(n, size) + long n, size; +{ + void *mem; + + mem = xmalloc(n * size); + memset(mem, 0, n * size); + + return mem; +} + +void * +ruby_xrealloc(ptr, size) + void *ptr; + long size; +{ + void *mem; + + if (size < 0) { + rb_raise(rb_eArgError, "negative re-allocation size"); + } + if (!ptr) return xmalloc(size); + if (size == 0) size = 1; + malloc_increase += size; + RUBY_CRITICAL(mem = realloc(ptr, size)); + if (!mem) { + garbage_collect(); + RUBY_CRITICAL(mem = realloc(ptr, size)); + if (!mem) { + rb_memerror(); + } + } + + return mem; +} + +void +ruby_xfree(x) + void *x; +{ + if (x) + RUBY_CRITICAL(free(x)); +} + +static int dont_gc; +static int during_gc; +static int need_call_final = 0; +static st_table *finalizer_table = 0; + + +/* + * call-seq: + * GC.enable => true or false + * + * Enables garbage collection, returning <code>true</code> if garbage + * collection was previously disabled. + * + * GC.disable #=> false + * GC.enable #=> true + * GC.enable #=> false + * + */ + +VALUE +rb_gc_enable() +{ + int old = dont_gc; + + dont_gc = Qfalse; + return old; +} + +/* + * call-seq: + * GC.disable => true or false + * + * Disables garbage collection, returning <code>true</code> if garbage + * collection was already disabled. + * + * GC.disable #=> false + * GC.disable #=> true + * + */ + +VALUE +rb_gc_disable() +{ + int old = dont_gc; + + dont_gc = Qtrue; + return old; +} + +VALUE rb_mGC; + +static struct gc_list { + VALUE *varptr; + struct gc_list *next; +} *global_List = 0; + +void +rb_gc_register_address(addr) + VALUE *addr; +{ + struct gc_list *tmp; + + tmp = ALLOC(struct gc_list); + tmp->next = global_List; + tmp->varptr = addr; + global_List = tmp; +} + +void +rb_gc_unregister_address(addr) + VALUE *addr; +{ + struct gc_list *tmp = global_List; + + if (tmp->varptr == addr) { + global_List = tmp->next; + RUBY_CRITICAL(free(tmp)); + return; + } + while (tmp->next) { + if (tmp->next->varptr == addr) { + struct gc_list *t = tmp->next; + + tmp->next = tmp->next->next; + RUBY_CRITICAL(free(t)); + break; + } + tmp = tmp->next; + } +} + +#undef GC_DEBUG + +void +rb_global_variable(var) + VALUE *var; +{ + rb_gc_register_address(var); +} + +typedef struct RVALUE { + union { + struct { + unsigned long flags; /* always 0 for freed obj */ + struct RVALUE *next; + } free; + struct RBasic basic; + struct RObject object; + struct RClass klass; + struct RFloat flonum; + struct RString string; + struct RArray array; + struct RRegexp regexp; + struct RHash hash; + struct RData data; + struct RStruct rstruct; + struct RBignum bignum; + struct RFile file; + struct RNode node; + struct RMatch match; + struct RVarmap varmap; + struct SCOPE scope; + } as; +#ifdef GC_DEBUG + char *file; + int line; +#endif +} RVALUE; + +static RVALUE *freelist = 0; +static RVALUE *deferred_final_list = 0; + +#define HEAPS_INCREMENT 10 +static struct heaps_slot { + RVALUE *slot; + int limit; +} *heaps; +static int heaps_length = 0; +static int heaps_used = 0; + +#define HEAP_MIN_SLOTS 10000 +static int heap_slots = HEAP_MIN_SLOTS; + +#define FREE_MIN 4096 + +static RVALUE *himem, *lomem; + +static void +add_heap() +{ + RVALUE *p, *pend; + + if (heaps_used == heaps_length) { + /* Realloc heaps */ + struct heaps_slot *p; + int length; + + heaps_length += HEAPS_INCREMENT; + length = heaps_length*sizeof(struct heaps_slot); + RUBY_CRITICAL( + if (heaps_used > 0) { + p = (struct heaps_slot *)realloc(heaps, length); + if (p) heaps = p; + } + else { + p = heaps = (struct heaps_slot *)malloc(length); + }); + if (p == 0) rb_memerror(); + } + + for (;;) { + RUBY_CRITICAL(p = heaps[heaps_used].slot = (RVALUE*)malloc(sizeof(RVALUE)*heap_slots)); + heaps[heaps_used].limit = heap_slots; + if (p == 0) { + if (heap_slots == HEAP_MIN_SLOTS) { + rb_memerror(); + } + heap_slots = HEAP_MIN_SLOTS; + continue; + } + break; + } + pend = p + heap_slots; + if (lomem == 0 || lomem > p) lomem = p; + if (himem < pend) himem = pend; + heaps_used++; + heap_slots *= 1.8; + + while (p < pend) { + p->as.free.flags = 0; + p->as.free.next = freelist; + freelist = p; + p++; + } +} +#define RANY(o) ((RVALUE*)(o)) + +VALUE +rb_newobj() +{ + VALUE obj; + + if (!freelist) garbage_collect(); + + obj = (VALUE)freelist; + freelist = freelist->as.free.next; + MEMZERO((void*)obj, RVALUE, 1); +#ifdef GC_DEBUG + RANY(obj)->file = ruby_sourcefile; + RANY(obj)->line = ruby_sourceline; +#endif + return obj; +} + +VALUE +rb_data_object_alloc(klass, datap, dmark, dfree) + VALUE klass; + void *datap; + RUBY_DATA_FUNC dmark; + RUBY_DATA_FUNC dfree; +{ + NEWOBJ(data, struct RData); + if (klass) Check_Type(klass, T_CLASS); + OBJSETUP(data, klass, T_DATA); + data->data = datap; + data->dfree = dfree; + data->dmark = dmark; + + return (VALUE)data; +} + +extern st_table *rb_class_tbl; +VALUE *rb_gc_stack_start = 0; + +#ifdef DJGPP +/* set stack size (http://www.delorie.com/djgpp/v2faq/faq15_9.html) */ +unsigned int _stklen = 0x180000; /* 1.5 kB */ +#endif + +#if defined(DJGPP) || defined(_WIN32_WCE) +static unsigned int STACK_LEVEL_MAX = 65535; +#elif defined(__human68k__) +unsigned int _stacksize = 262144; +# define STACK_LEVEL_MAX (_stacksize - 4096) +# undef HAVE_GETRLIMIT +#elif defined(HAVE_GETRLIMIT) +static unsigned int STACK_LEVEL_MAX = 655300; +#else +# define STACK_LEVEL_MAX 655300 +#endif + +NOINLINE(static void set_stack_end _((VALUE **stack_end_p))); + +static void +set_stack_end(VALUE **stack_end_p) +{ + VALUE stack_end; + *stack_end_p = &stack_end; +} +#define SET_STACK_END VALUE *stack_end; set_stack_end(&stack_end) +#define STACK_END (stack_end) + +#if defined(sparc) || defined(__sparc__) +# define STACK_LENGTH (rb_gc_stack_start - STACK_END + 0x80) +#elif STACK_GROW_DIRECTION < 0 +# define STACK_LENGTH (rb_gc_stack_start - STACK_END) +#elif STACK_GROW_DIRECTION > 0 +# define STACK_LENGTH (STACK_END - rb_gc_stack_start + 1) +#else +# define STACK_LENGTH ((STACK_END < rb_gc_stack_start) ? rb_gc_stack_start - STACK_END\ + : STACK_END - rb_gc_stack_start + 1) +#endif +#if STACK_GROW_DIRECTION > 0 +# define STACK_UPPER(x, a, b) a +#elif STACK_GROW_DIRECTION < 0 +# define STACK_UPPER(x, a, b) b +#else +static int grow_direction; +static int +stack_grow_direction(addr) + VALUE *addr; +{ + SET_STACK_END; + + if (STACK_END > addr) return grow_direction = 1; + return grow_direction = -1; +} +# define stack_growup_p(x) ((grow_direction ? grow_direction : stack_grow_direction(x)) > 0) +# define STACK_UPPER(x, a, b) (stack_growup_p(x) ? a : b) +#endif + +#define GC_WATER_MARK 512 + +#define CHECK_STACK(ret) do {\ + SET_STACK_END;\ + (ret) = (STACK_LENGTH > STACK_LEVEL_MAX + GC_WATER_MARK);\ +} while (0) + +int +ruby_stack_length(p) + VALUE **p; +{ + SET_STACK_END; + if (p) *p = STACK_UPPER(STACK_END, rb_gc_stack_start, STACK_END); + return STACK_LENGTH; +} + +int +ruby_stack_check() +{ + int ret; + + CHECK_STACK(ret); + return ret; +} + +#define MARK_STACK_MAX 1024 +static VALUE mark_stack[MARK_STACK_MAX]; +static VALUE *mark_stack_ptr; +static int mark_stack_overflow; + +static void +init_mark_stack() +{ + mark_stack_overflow = 0; + mark_stack_ptr = mark_stack; +} + +#define MARK_STACK_EMPTY (mark_stack_ptr == mark_stack) + +static st_table *source_filenames; + +char * +rb_source_filename(f) + const char *f; +{ + char *name; + + if (!st_lookup(source_filenames, (st_data_t)f, (st_data_t *)&name)) { + long len = strlen(f) + 1; + char *ptr = name = ALLOC_N(char, len + 1); + *ptr++ = 0; + MEMCPY(ptr, f, char, len); + st_add_direct(source_filenames, (st_data_t)ptr, (st_data_t)name); + return ptr; + } + return name + 1; +} + +static void +mark_source_filename(f) + char *f; +{ + if (f) { + f[-1] = 1; + } +} + +static int +sweep_source_filename(key, value) + char *key, *value; +{ + if (*value) { + *value = 0; + return ST_CONTINUE; + } + else { + free(value); + return ST_DELETE; + } +} + +static void gc_mark _((VALUE ptr, int lev)); +static void gc_mark_children _((VALUE ptr, int lev)); + +static void +gc_mark_all() +{ + RVALUE *p, *pend; + int i; + + init_mark_stack(); + for (i = 0; i < heaps_used; i++) { + p = heaps[i].slot; pend = p + heaps[i].limit; + while (p < pend) { + if ((p->as.basic.flags & FL_MARK) && + (p->as.basic.flags != FL_MARK)) { + gc_mark_children((VALUE)p, 0); + } + p++; + } + } +} + +static void +gc_mark_rest() +{ + VALUE tmp_arry[MARK_STACK_MAX]; + VALUE *p; + + p = (mark_stack_ptr - mark_stack) + tmp_arry; + MEMCPY(tmp_arry, mark_stack, VALUE, MARK_STACK_MAX); + + init_mark_stack(); + while(p != tmp_arry){ + p--; + gc_mark_children(*p, 0); + } +} + +static inline int +is_pointer_to_heap(ptr) + void *ptr; +{ + register RVALUE *p = RANY(ptr); + register RVALUE *heap_org; + register long i; + + if (p < lomem || p > himem) return Qfalse; + + /* check if p looks like a pointer */ + for (i=0; i < heaps_used; i++) { + heap_org = heaps[i].slot; + if (heap_org <= p && p < heap_org + heaps[i].limit && + ((((char*)p)-((char*)heap_org))%sizeof(RVALUE)) == 0) + return Qtrue; + } + return Qfalse; +} + +static void +mark_locations_array(x, n) + register VALUE *x; + register long n; +{ + VALUE v; + while (n--) { + v = *x; + if (is_pointer_to_heap((void *)v)) { + gc_mark(v, 0); + } + x++; + } +} + +void +rb_gc_mark_locations(start, end) + VALUE *start, *end; +{ + long n; + + n = end - start; + mark_locations_array(start,n); +} + +static int +mark_entry(key, value, lev) + ID key; + VALUE value; + int lev; +{ + gc_mark(value, lev); + return ST_CONTINUE; +} + +void +mark_tbl(tbl, lev) + st_table *tbl; + int lev; +{ + if (!tbl) return; + st_foreach(tbl, mark_entry, lev); +} + +void +rb_mark_tbl(tbl) + st_table *tbl; +{ + mark_tbl(tbl, 0); +} + +static int +mark_keyvalue(key, value, lev) + VALUE key; + VALUE value; + int lev; +{ + gc_mark(key, lev); + gc_mark(value, lev); + return ST_CONTINUE; +} + +void +mark_hash(tbl, lev) + st_table *tbl; + int lev; +{ + if (!tbl) return; + st_foreach(tbl, mark_keyvalue, lev); +} + +void +rb_mark_hash(tbl) + st_table *tbl; +{ + mark_hash(tbl, 0); +} + +void +rb_gc_mark_maybe(obj) + VALUE obj; +{ + if (is_pointer_to_heap((void *)obj)) { + gc_mark(obj, 0); + } +} + +#define GC_LEVEL_MAX 250 + +void +gc_mark(ptr, lev) + VALUE ptr; + int lev; +{ + register RVALUE *obj; + + obj = RANY(ptr); + if (rb_special_const_p(ptr)) return; /* special const not marked */ + if (obj->as.basic.flags == 0) return; /* free cell */ + if (obj->as.basic.flags & FL_MARK) return; /* already marked */ + obj->as.basic.flags |= FL_MARK; + + if (lev > GC_LEVEL_MAX || (lev == 0 && ruby_stack_check())) { + if (!mark_stack_overflow) { + if (mark_stack_ptr - mark_stack < MARK_STACK_MAX) { + *mark_stack_ptr = ptr; + mark_stack_ptr++; + } + else { + mark_stack_overflow = 1; + } + } + return; + } + gc_mark_children(ptr, lev+1); +} + +void +rb_gc_mark(ptr) + VALUE ptr; +{ + gc_mark(ptr, 0); +} + +static void +gc_mark_children(ptr, lev) + VALUE ptr; + int lev; +{ + register RVALUE *obj = RANY(ptr); + + goto marking; /* skip */ + + again: + obj = RANY(ptr); + if (rb_special_const_p(ptr)) return; /* special const not marked */ + if (obj->as.basic.flags == 0) return; /* free cell */ + if (obj->as.basic.flags & FL_MARK) return; /* already marked */ + obj->as.basic.flags |= FL_MARK; + + marking: + if (FL_TEST(obj, FL_EXIVAR)) { + rb_mark_generic_ivar(ptr); + } + + switch (obj->as.basic.flags & T_MASK) { + case T_NIL: + case T_FIXNUM: + rb_bug("rb_gc_mark() called for broken object"); + break; + + case T_NODE: + mark_source_filename(obj->as.node.nd_file); + switch (nd_type(obj)) { + case NODE_IF: /* 1,2,3 */ + case NODE_FOR: + case NODE_ITER: + case NODE_CREF: + case NODE_WHEN: + case NODE_MASGN: + case NODE_RESCUE: + case NODE_RESBODY: + case NODE_CLASS: + gc_mark((VALUE)obj->as.node.u2.node, lev); + /* fall through */ + case NODE_BLOCK: /* 1,3 */ + case NODE_ARRAY: + case NODE_DSTR: + case NODE_DXSTR: + case NODE_DREGX: + case NODE_DREGX_ONCE: + case NODE_FBODY: + case NODE_ENSURE: + case NODE_CALL: + case NODE_DEFS: + case NODE_OP_ASGN1: + gc_mark((VALUE)obj->as.node.u1.node, lev); + /* fall through */ + case NODE_SUPER: /* 3 */ + case NODE_FCALL: + case NODE_DEFN: + ptr = (VALUE)obj->as.node.u3.node; + goto again; + + case NODE_WHILE: /* 1,2 */ + case NODE_UNTIL: + case NODE_AND: + case NODE_OR: + case NODE_CASE: + case NODE_SCLASS: + case NODE_DOT2: + case NODE_DOT3: + case NODE_FLIP2: + case NODE_FLIP3: + case NODE_MATCH2: + case NODE_MATCH3: + case NODE_OP_ASGN_OR: + case NODE_OP_ASGN_AND: + case NODE_MODULE: + case NODE_ALIAS: + case NODE_VALIAS: + gc_mark((VALUE)obj->as.node.u1.node, lev); + /* fall through */ + case NODE_METHOD: /* 2 */ + case NODE_NOT: + case NODE_GASGN: + case NODE_LASGN: + case NODE_DASGN: + case NODE_DASGN_CURR: + case NODE_IASGN: + case NODE_CVDECL: + case NODE_CVASGN: + case NODE_COLON3: + case NODE_OPT_N: + case NODE_EVSTR: + case NODE_UNDEF: + ptr = (VALUE)obj->as.node.u2.node; + goto again; + + case NODE_HASH: /* 1 */ + case NODE_LIT: + case NODE_STR: + case NODE_XSTR: + case NODE_DEFINED: + case NODE_MATCH: + case NODE_RETURN: + case NODE_BREAK: + case NODE_NEXT: + case NODE_YIELD: + case NODE_COLON2: + case NODE_ARGS: + case NODE_SPLAT: + case NODE_TO_ARY: + case NODE_SVALUE: + ptr = (VALUE)obj->as.node.u1.node; + goto again; + + case NODE_SCOPE: /* 2,3 */ + case NODE_BLOCK_PASS: + case NODE_CDECL: + gc_mark((VALUE)obj->as.node.u3.node, lev); + ptr = (VALUE)obj->as.node.u2.node; + goto again; + + case NODE_ZARRAY: /* - */ + case NODE_ZSUPER: + case NODE_CFUNC: + case NODE_VCALL: + case NODE_GVAR: + case NODE_LVAR: + case NODE_DVAR: + case NODE_IVAR: + case NODE_CVAR: + case NODE_NTH_REF: + case NODE_BACK_REF: + case NODE_REDO: + case NODE_RETRY: + case NODE_SELF: + case NODE_NIL: + case NODE_TRUE: + case NODE_FALSE: + case NODE_ERRINFO: + case NODE_ATTRSET: + case NODE_BLOCK_ARG: + case NODE_POSTEXE: + break; +#ifdef C_ALLOCA + case NODE_ALLOCA: + mark_locations_array((VALUE*)obj->as.node.u1.value, + obj->as.node.u3.cnt); + ptr = (VALUE)obj->as.node.u2.node; + goto again; +#endif + + default: /* unlisted NODE */ + if (is_pointer_to_heap(obj->as.node.u1.node)) { + gc_mark((VALUE)obj->as.node.u1.node, lev); + } + if (is_pointer_to_heap(obj->as.node.u2.node)) { + gc_mark((VALUE)obj->as.node.u2.node, lev); + } + if (is_pointer_to_heap(obj->as.node.u3.node)) { + gc_mark((VALUE)obj->as.node.u3.node, lev); + } + } + return; /* no need to mark class. */ + } + + gc_mark(obj->as.basic.klass, lev); + switch (obj->as.basic.flags & T_MASK) { + case T_ICLASS: + case T_CLASS: + case T_MODULE: + mark_tbl(obj->as.klass.m_tbl, lev); + mark_tbl(obj->as.klass.iv_tbl, lev); + ptr = obj->as.klass.super; + goto again; + + case T_ARRAY: + if (FL_TEST(obj, ELTS_SHARED)) { + ptr = obj->as.array.aux.shared; + goto again; + } + else { + long i, len = obj->as.array.len; + VALUE *ptr = obj->as.array.ptr; + + for (i=0; i < len; i++) { + gc_mark(*ptr++, lev); + } + } + break; + + case T_HASH: + mark_hash(obj->as.hash.tbl, lev); + ptr = obj->as.hash.ifnone; + goto again; + + case T_STRING: +#define STR_ASSOC FL_USER3 /* copied from string.c */ + if (FL_TEST(obj, ELTS_SHARED|STR_ASSOC)) { + ptr = obj->as.string.aux.shared; + goto again; + } + break; + + case T_DATA: + if (obj->as.data.dmark) (*obj->as.data.dmark)(DATA_PTR(obj)); + break; + + case T_OBJECT: + mark_tbl(obj->as.object.iv_tbl, lev); + break; + + case T_FILE: + case T_REGEXP: + case T_FLOAT: + case T_BIGNUM: + case T_BLOCK: + break; + + case T_MATCH: + if (obj->as.match.str) { + ptr = obj->as.match.str; + goto again; + } + break; + + case T_VARMAP: + gc_mark(obj->as.varmap.val, lev); + ptr = (VALUE)obj->as.varmap.next; + goto again; + + case T_SCOPE: + if (obj->as.scope.local_vars && (obj->as.scope.flags & SCOPE_MALLOC)) { + int n = obj->as.scope.local_tbl[0]+1; + VALUE *vars = &obj->as.scope.local_vars[-1]; + + while (n--) { + gc_mark(*vars++, lev); + } + } + break; + + case T_STRUCT: + { + long len = obj->as.rstruct.len; + VALUE *ptr = obj->as.rstruct.ptr; + + while (len--) { + gc_mark(*ptr++, lev); + } + } + break; + + default: + rb_bug("rb_gc_mark(): unknown data type 0x%lx(0x%lx) %s", + obj->as.basic.flags & T_MASK, obj, + is_pointer_to_heap(obj) ? "corrupted object" : "non object"); + } +} + +static void obj_free _((VALUE)); + +static void +finalize_list(p) + RVALUE *p; +{ + while (p) { + RVALUE *tmp = p->as.free.next; + run_final((VALUE)p); + if (!FL_TEST(p, FL_SINGLETON)) { /* not freeing page */ + p->as.free.flags = 0; + p->as.free.next = freelist; + freelist = p; + } + p = tmp; + } +} + +static void +free_unused_heaps() +{ + int i, j; + + for (i = j = 1; j < heaps_used; i++) { + if (heaps[i].limit == 0) { + free(heaps[i].slot); + heaps_used--; + } + else { + if (i != j) { + heaps[j] = heaps[i]; + } + j++; + } + } +} + +static void +gc_sweep() +{ + RVALUE *p, *pend, *final_list; + int freed = 0; + int i; + unsigned long live = 0; + + mark_source_filename(ruby_sourcefile); + st_foreach(source_filenames, sweep_source_filename, 0); + + freelist = 0; + final_list = deferred_final_list; + deferred_final_list = 0; + for (i = 0; i < heaps_used; i++) { + int n = 0; + RVALUE *free = freelist; + RVALUE *final = final_list; + + p = heaps[i].slot; pend = p + heaps[i].limit; + while (p < pend) { + if (!(p->as.basic.flags & FL_MARK)) { + if (p->as.basic.flags) { + obj_free((VALUE)p); + } + if (need_call_final && FL_TEST(p, FL_FINALIZE)) { + p->as.free.flags = FL_MARK; /* remain marked */ + p->as.free.next = final_list; + final_list = p; + } + else { + p->as.free.flags = 0; + p->as.free.next = freelist; + freelist = p; + } + n++; + } + else if (RBASIC(p)->flags == FL_MARK) { + /* objects to be finalized */ + /* do notning remain marked */ + } + else { + RBASIC(p)->flags &= ~FL_MARK; + live++; + } + p++; + } + if (n == heaps[i].limit && freed > FREE_MIN) { + RVALUE *pp; + + heaps[i].limit = 0; + for (pp = final_list; pp != final; pp = pp->as.free.next) { + p->as.free.flags |= FL_SINGLETON; /* freeing page mark */ + } + freelist = free; /* cancel this page from freelist */ + } + else { + freed += n; + } + } + if (malloc_increase > malloc_limit) { + malloc_limit += (malloc_increase - malloc_limit) * (double)live / (live + freed); + if (malloc_limit < GC_MALLOC_LIMIT) malloc_limit = GC_MALLOC_LIMIT; + } + malloc_increase = 0; + if (freed < FREE_MIN) { + add_heap(); + } + during_gc = 0; + + /* clear finalization list */ + if (final_list) { + deferred_final_list = final_list; + return; + } + free_unused_heaps(); +} + +void +rb_gc_force_recycle(p) + VALUE p; +{ + RANY(p)->as.free.flags = 0; + RANY(p)->as.free.next = freelist; + freelist = RANY(p); +} + +static void +obj_free(obj) + VALUE obj; +{ + switch (RANY(obj)->as.basic.flags & T_MASK) { + case T_NIL: + case T_FIXNUM: + case T_TRUE: + case T_FALSE: + rb_bug("obj_free() called for broken object"); + break; + } + + if (FL_TEST(obj, FL_EXIVAR)) { + rb_free_generic_ivar((VALUE)obj); + } + + switch (RANY(obj)->as.basic.flags & T_MASK) { + case T_OBJECT: + if (RANY(obj)->as.object.iv_tbl) { + st_free_table(RANY(obj)->as.object.iv_tbl); + } + break; + case T_MODULE: + case T_CLASS: + rb_clear_cache_by_class((VALUE)obj); + st_free_table(RANY(obj)->as.klass.m_tbl); + if (RANY(obj)->as.object.iv_tbl) { + st_free_table(RANY(obj)->as.object.iv_tbl); + } + break; + case T_STRING: + if (RANY(obj)->as.string.ptr && !FL_TEST(obj, ELTS_SHARED)) { + RUBY_CRITICAL(free(RANY(obj)->as.string.ptr)); + } + break; + case T_ARRAY: + if (RANY(obj)->as.array.ptr && !FL_TEST(obj, ELTS_SHARED)) { + RUBY_CRITICAL(free(RANY(obj)->as.array.ptr)); + } + break; + case T_HASH: + if (RANY(obj)->as.hash.tbl) { + st_free_table(RANY(obj)->as.hash.tbl); + } + break; + case T_REGEXP: + if (RANY(obj)->as.regexp.ptr) { + onig_free(RANY(obj)->as.regexp.ptr); + } + if (RANY(obj)->as.regexp.str) { + RUBY_CRITICAL(free(RANY(obj)->as.regexp.str)); + } + break; + case T_DATA: + if (DATA_PTR(obj)) { + if ((long)RANY(obj)->as.data.dfree == -1) { + RUBY_CRITICAL(free(DATA_PTR(obj))); + } + else if (RANY(obj)->as.data.dfree) { + (*RANY(obj)->as.data.dfree)(DATA_PTR(obj)); + } + } + break; + case T_MATCH: + if (RANY(obj)->as.match.regs) { + onig_region_free(RANY(obj)->as.match.regs, 0); + RUBY_CRITICAL(free(RANY(obj)->as.match.regs)); + } + break; + case T_FILE: + if (RANY(obj)->as.file.fptr) { + rb_io_fptr_finalize(RANY(obj)->as.file.fptr); + } + break; + case T_ICLASS: + /* iClass shares table with the module */ + break; + + case T_FLOAT: + case T_VARMAP: + case T_BLOCK: + break; + + case T_BIGNUM: + if (RANY(obj)->as.bignum.digits) { + RUBY_CRITICAL(free(RANY(obj)->as.bignum.digits)); + } + break; + case T_NODE: + switch (nd_type(obj)) { + case NODE_SCOPE: + if (RANY(obj)->as.node.u1.tbl) { + RUBY_CRITICAL(free(RANY(obj)->as.node.u1.tbl)); + } + break; +#ifdef C_ALLOCA + case NODE_ALLOCA: + RUBY_CRITICAL(free(RANY(obj)->as.node.u1.node)); + break; +#endif + } + return; /* no need to free iv_tbl */ + + case T_SCOPE: + if (RANY(obj)->as.scope.local_vars && + RANY(obj)->as.scope.flags != SCOPE_ALLOCA) { + VALUE *vars = RANY(obj)->as.scope.local_vars-1; + if (vars[0] == 0) + RUBY_CRITICAL(free(RANY(obj)->as.scope.local_tbl)); + if (RANY(obj)->as.scope.flags & SCOPE_MALLOC) + RUBY_CRITICAL(free(vars)); + } + break; + + case T_STRUCT: + if (RANY(obj)->as.rstruct.ptr) { + RUBY_CRITICAL(free(RANY(obj)->as.rstruct.ptr)); + } + break; + + default: + rb_bug("gc_sweep(): unknown data type 0x%lx(%ld)", obj, + RANY(obj)->as.basic.flags & T_MASK); + } +} + +void +rb_gc_mark_frame(frame) + struct FRAME *frame; +{ + gc_mark((VALUE)frame->node, 0); +} + +#ifdef __GNUC__ +#if defined(__human68k__) || defined(DJGPP) +#if defined(__human68k__) +typedef unsigned long rb_jmp_buf[8]; +__asm__ (".even\n\ +_rb_setjmp:\n\ + move.l 4(sp),a0\n\ + movem.l d3-d7/a3-a5,(a0)\n\ + moveq.l #0,d0\n\ + rts"); +#ifdef setjmp +#undef setjmp +#endif +#else +#if defined(DJGPP) +typedef unsigned long rb_jmp_buf[6]; +__asm__ (".align 4\n\ +_rb_setjmp:\n\ + pushl %ebp\n\ + movl %esp,%ebp\n\ + movl 8(%ebp),%ebp\n\ + movl %eax,(%ebp)\n\ + movl %ebx,4(%ebp)\n\ + movl %ecx,8(%ebp)\n\ + movl %edx,12(%ebp)\n\ + movl %esi,16(%ebp)\n\ + movl %edi,20(%ebp)\n\ + popl %ebp\n\ + xorl %eax,%eax\n\ + ret"); +#endif +#endif +int rb_setjmp (rb_jmp_buf); +#define jmp_buf rb_jmp_buf +#define setjmp rb_setjmp +#endif /* __human68k__ or DJGPP */ +#endif /* __GNUC__ */ + +static void +garbage_collect() +{ + struct gc_list *list; + struct FRAME * volatile frame; /* gcc 2.7.2.3 -O2 bug?? */ + jmp_buf save_regs_gc_mark; + SET_STACK_END; + +#ifdef HAVE_NATIVETHREAD + if (!is_ruby_native_thread()) { + rb_bug("cross-thread violation on rb_gc()"); + } +#endif + if (dont_gc || during_gc) { + if (!freelist) { + add_heap(); + } + return; + } + if (during_gc) return; + during_gc++; + + init_mark_stack(); + + /* mark frame stack */ + for (frame = ruby_frame; frame; frame = frame->prev) { + rb_gc_mark_frame(frame); + if (frame->tmp) { + struct FRAME *tmp = frame->tmp; + while (tmp) { + rb_gc_mark_frame(tmp); + tmp = tmp->prev; + } + } + } + gc_mark((VALUE)ruby_scope, 0); + gc_mark((VALUE)ruby_dyna_vars, 0); + if (finalizer_table) { + mark_tbl(finalizer_table, 0); + } + + FLUSH_REGISTER_WINDOWS; + /* This assumes that all registers are saved into the jmp_buf (and stack) */ + setjmp(save_regs_gc_mark); + mark_locations_array((VALUE*)save_regs_gc_mark, sizeof(save_regs_gc_mark) / sizeof(VALUE *)); +#if STACK_GROW_DIRECTION < 0 + rb_gc_mark_locations((VALUE*)STACK_END, rb_gc_stack_start); +#elif STACK_GROW_DIRECTION > 0 + rb_gc_mark_locations(rb_gc_stack_start, (VALUE*)STACK_END + 1); +#else + if ((VALUE*)STACK_END < rb_gc_stack_start) + rb_gc_mark_locations((VALUE*)STACK_END, rb_gc_stack_start); + else + rb_gc_mark_locations(rb_gc_stack_start, (VALUE*)STACK_END + 1); +#endif +#ifdef __ia64__ + /* mark backing store (flushed register window on the stack) */ + /* the basic idea from guile GC code */ + { + ucontext_t ctx; + VALUE *top, *bot; +#ifdef HAVE_UNWIND_H + _Unwind_Context *unwctx = _UNW_createContextForSelf(); +#endif + + getcontext(&ctx); + mark_locations_array((VALUE*)&ctx.uc_mcontext, + ((size_t)(sizeof(VALUE)-1 + sizeof ctx.uc_mcontext)/sizeof(VALUE))); +#ifdef HAVE_UNWIND_H + _UNW_currentContext(unwctx); + bot = (VALUE*)(long)_UNW_getAR(unwctx, _UNW_AR_BSP); + top = (VALUE*)(long)_UNW_getAR(unwctx, _UNW_AR_BSPSTORE); + _UNW_destroyContext(unwctx); +#else + bot = (VALUE*)__libc_ia64_register_backing_store_base; + top = (VALUE*)ctx.uc_mcontext.IA64_BSPSTORE; +#endif + rb_gc_mark_locations(bot, top); + } +#endif +#if defined(__human68k__) || defined(__mc68000__) + rb_gc_mark_locations((VALUE*)((char*)STACK_END + 2), + (VALUE*)((char*)rb_gc_stack_start + 2)); +#endif + rb_gc_mark_threads(); + + /* mark protected global variables */ + for (list = global_List; list; list = list->next) { + rb_gc_mark_maybe(*list->varptr); + } + rb_mark_end_proc(); + rb_gc_mark_global_tbl(); + + rb_mark_tbl(rb_class_tbl); + rb_gc_mark_trap_list(); + + /* mark generic instance variables for special constants */ + rb_mark_generic_ivar_tbl(); + + rb_gc_mark_parser(); + + /* gc_mark objects whose marking are not completed*/ + while (!MARK_STACK_EMPTY){ + if (mark_stack_overflow){ + gc_mark_all(); + } + else { + gc_mark_rest(); + } + } + gc_sweep(); +} + +void +rb_gc() +{ + garbage_collect(); + rb_gc_finalize_deferred(); +} + +/* + * call-seq: + * GC.start => nil + * gc.garbage_collect => nil + * ObjectSpace.garbage_collect => nil + * + * Initiates garbage collection, unless manually disabled. + * + */ + +VALUE +rb_gc_start() +{ + rb_gc(); + return Qnil; +} + +void +ruby_set_stack_size(size) + size_t size; +{ +#ifndef STACK_LEVEL_MAX + STACK_LEVEL_MAX = size/sizeof(VALUE); +#endif +} + +void +Init_stack(addr) + VALUE *addr; +{ +#if defined(_WIN32) || defined(__CYGWIN__) + MEMORY_BASIC_INFORMATION m; + memset(&m, 0, sizeof(m)); + VirtualQuery(&m, &m, sizeof(m)); + rb_gc_stack_start = + STACK_UPPER((VALUE *)&m, (VALUE *)m.BaseAddress, + (VALUE *)((char *)m.BaseAddress + m.RegionSize) - 1); +#elif defined(STACK_END_ADDRESS) + extern void *STACK_END_ADDRESS; + rb_gc_stack_start = STACK_END_ADDRESS; +#else + if (!addr) addr = (VALUE *)&addr; + STACK_UPPER(&addr, addr, ++addr); + if (rb_gc_stack_start) { + if (STACK_UPPER(&addr, + rb_gc_stack_start > addr, + rb_gc_stack_start < addr)) + rb_gc_stack_start = addr; + return; + } + rb_gc_stack_start = addr; +#endif +#ifdef HAVE_GETRLIMIT + { + struct rlimit rlim; + + if (getrlimit(RLIMIT_STACK, &rlim) == 0) { + unsigned int space = rlim.rlim_cur/5; + + if (space > 1024*1024) space = 1024*1024; + STACK_LEVEL_MAX = (rlim.rlim_cur - space) / sizeof(VALUE); + } + } +#if defined(__ia64__) && (!defined(__GNUC__) || __GNUC__ < 2 || defined(__OPTIMIZE__)) + /* ruby crashes on IA64 if compiled with optimizer on */ + /* when if STACK_LEVEL_MAX is greater than this magic number */ + /* I know this is a kludge. I suspect optimizer bug */ +#define IA64_MAGIC_STACK_LIMIT 49152 + if (STACK_LEVEL_MAX > IA64_MAGIC_STACK_LIMIT) + STACK_LEVEL_MAX = IA64_MAGIC_STACK_LIMIT; +#endif +#endif +} + + +/* + * Document-class: ObjectSpace + * + * The <code>ObjectSpace</code> module contains a number of routines + * that interact with the garbage collection facility and allow you to + * traverse all living objects with an iterator. + * + * <code>ObjectSpace</code> also provides support for object + * finalizers, procs that will be called when a specific object is + * about to be destroyed by garbage collection. + * + * include ObjectSpace + * + * + * a = "A" + * b = "B" + * c = "C" + * + * + * define_finalizer(a, proc {|id| puts "Finalizer one on #{id}" }) + * define_finalizer(a, proc {|id| puts "Finalizer two on #{id}" }) + * define_finalizer(b, proc {|id| puts "Finalizer three on #{id}" }) + * + * <em>produces:</em> + * + * Finalizer three on 537763470 + * Finalizer one on 537763480 + * Finalizer two on 537763480 + * + */ + +void +Init_heap() +{ + if (!rb_gc_stack_start) { + Init_stack(0); + } + add_heap(); +} + +static VALUE +os_live_obj() +{ + int i; + int n = 0; + + for (i = 0; i < heaps_used; i++) { + RVALUE *p, *pend; + + p = heaps[i].slot; pend = p + heaps[i].limit; + for (;p < pend; p++) { + if (p->as.basic.flags) { + switch (TYPE(p)) { + case T_ICLASS: + case T_VARMAP: + case T_SCOPE: + case T_NODE: + continue; + case T_CLASS: + if (FL_TEST(p, FL_SINGLETON)) continue; + default: + if (!p->as.basic.klass) continue; + rb_yield((VALUE)p); + n++; + } + } + } + } + + return INT2FIX(n); +} + +static VALUE +os_obj_of(of) + VALUE of; +{ + int i; + int n = 0; + + for (i = 0; i < heaps_used; i++) { + RVALUE *p, *pend; + + p = heaps[i].slot; pend = p + heaps[i].limit; + for (;p < pend; p++) { + if (p->as.basic.flags) { + switch (TYPE(p)) { + case T_ICLASS: + case T_VARMAP: + case T_SCOPE: + case T_NODE: + continue; + case T_CLASS: + if (FL_TEST(p, FL_SINGLETON)) continue; + default: + if (!p->as.basic.klass) continue; + if (rb_obj_is_kind_of((VALUE)p, of)) { + rb_yield((VALUE)p); + n++; + } + } + } + } + } + + return INT2FIX(n); +} + +/* + * call-seq: + * ObjectSpace.each_object([module]) {|obj| ... } => fixnum + * + * Calls the block once for each living, nonimmediate object in this + * Ruby process. If <i>module</i> is specified, calls the block + * for only those classes or modules that match (or are a subclass of) + * <i>module</i>. Returns the number of objects found. Immediate + * objects (<code>Fixnum</code>s, <code>Symbol</code>s + * <code>true</code>, <code>false</code>, and <code>nil</code>) are + * never returned. In the example below, <code>each_object</code> + * returns both the numbers we defined and several constants defined in + * the <code>Math</code> module. + * + * a = 102.7 + * b = 95 # Won't be returned + * c = 12345678987654321 + * count = ObjectSpace.each_object(Numeric) {|x| p x } + * puts "Total count: #{count}" + * + * <em>produces:</em> + * + * 12345678987654321 + * 102.7 + * 2.71828182845905 + * 3.14159265358979 + * 2.22044604925031e-16 + * 1.7976931348623157e+308 + * 2.2250738585072e-308 + * Total count: 7 + * + */ + +static VALUE +os_each_obj(argc, argv) + int argc; + VALUE *argv; +{ + VALUE of; + + rb_secure(4); + if (rb_scan_args(argc, argv, "01", &of) == 0) { + return os_live_obj(); + } + else { + return os_obj_of(of); + } +} + +static VALUE finalizers; + +/* deprecated + */ + +static VALUE +add_final(os, block) + VALUE os, block; +{ + rb_warn("ObjectSpace::add_finalizer is deprecated; use define_finalizer"); + if (!rb_respond_to(block, rb_intern("call"))) { + rb_raise(rb_eArgError, "wrong type argument %s (should be callable)", + rb_obj_classname(block)); + } + rb_ary_push(finalizers, block); + return block; +} + +/* + * deprecated + */ +static VALUE +rm_final(os, block) + VALUE os, block; +{ + rb_warn("ObjectSpace::remove_finalizer is deprecated; use undefine_finalizer"); + rb_ary_delete(finalizers, block); + return block; +} + +/* + * deprecated + */ +static VALUE +finals() +{ + rb_warn("ObjectSpace::finalizers is deprecated"); + return finalizers; +} + +/* + * deprecated + */ + +static VALUE +call_final(os, obj) + VALUE os, obj; +{ + rb_warn("ObjectSpace::call_finalizer is deprecated; use define_finalizer"); + need_call_final = 1; + FL_SET(obj, FL_FINALIZE); + return obj; +} + +/* + * call-seq: + * ObjectSpace.undefine_finalizer(obj) + * + * Removes all finalizers for <i>obj</i>. + * + */ + +static VALUE +undefine_final(os, obj) + VALUE os, obj; +{ + if (finalizer_table) { + st_delete(finalizer_table, (st_data_t*)&obj, 0); + } + return obj; +} + +/* + * call-seq: + * ObjectSpace.define_finalizer(obj, aProc=proc()) + * + * Adds <i>aProc</i> as a finalizer, to be called when <i>obj</i> + * is about to be destroyed. + * + */ + +static VALUE +define_final(argc, argv, os) + int argc; + VALUE *argv; + VALUE os; +{ + VALUE obj, block, table; + + rb_scan_args(argc, argv, "11", &obj, &block); + if (argc == 1) { + block = rb_block_proc(); + } + else if (!rb_respond_to(block, rb_intern("call"))) { + rb_raise(rb_eArgError, "wrong type argument %s (should be callable)", + rb_obj_classname(block)); + } + need_call_final = 1; + FL_SET(obj, FL_FINALIZE); + + block = rb_ary_new3(2, INT2FIX(ruby_safe_level), block); + + if (!finalizer_table) { + finalizer_table = st_init_numtable(); + } + if (st_lookup(finalizer_table, obj, &table)) { + rb_ary_push(table, block); + } + else { + st_add_direct(finalizer_table, obj, rb_ary_new3(1, block)); + } + return block; +} + +void +rb_gc_copy_finalizer(dest, obj) + VALUE dest, obj; +{ + VALUE table; + + if (!finalizer_table) return; + if (!FL_TEST(obj, FL_FINALIZE)) return; + if (st_lookup(finalizer_table, obj, &table)) { + st_insert(finalizer_table, dest, table); + } + FL_SET(dest, FL_FINALIZE); +} + +static VALUE +run_single_final(args) + VALUE *args; +{ + rb_eval_cmd(args[0], args[1], (int)args[2]); + return Qnil; +} + +static void +run_final(obj) + VALUE obj; +{ + long i; + int status, critical_save = rb_thread_critical; + VALUE args[3], table; + + rb_thread_critical = Qtrue; + args[1] = rb_ary_new3(1, rb_obj_id(obj)); /* make obj into id */ + args[2] = (VALUE)ruby_safe_level; + for (i=0; i<RARRAY(finalizers)->len; i++) { + args[0] = RARRAY(finalizers)->ptr[i]; + rb_protect((VALUE(*)_((VALUE)))run_single_final, (VALUE)args, &status); + } + if (finalizer_table && st_delete(finalizer_table, (st_data_t*)&obj, &table)) { + for (i=0; i<RARRAY(table)->len; i++) { + VALUE final = RARRAY(table)->ptr[i]; + args[0] = RARRAY(final)->ptr[1]; + args[2] = FIX2INT(RARRAY(final)->ptr[0]); + rb_protect((VALUE(*)_((VALUE)))run_single_final, (VALUE)args, &status); + } + } + rb_thread_critical = critical_save; +} + +void +rb_gc_finalize_deferred() +{ + RVALUE *p = deferred_final_list; + + deferred_final_list = 0; + if (p) { + finalize_list(p); + free_unused_heaps(); + } +} + +void +rb_gc_call_finalizer_at_exit() +{ + RVALUE *p, *pend; + int i; + + /* run finalizers */ + if (need_call_final) { + finalize_list(deferred_final_list); + for (i = 0; i < heaps_used; i++) { + p = heaps[i].slot; pend = p + heaps[i].limit; + while (p < pend) { + if (FL_TEST(p, FL_FINALIZE)) { + FL_UNSET(p, FL_FINALIZE); + p->as.basic.klass = 0; + run_final((VALUE)p); + } + p++; + } + } + } + /* run data object's finalizers */ + for (i = 0; i < heaps_used; i++) { + p = heaps[i].slot; pend = p + heaps[i].limit; + while (p < pend) { + if (BUILTIN_TYPE(p) == T_DATA && + DATA_PTR(p) && RANY(p)->as.data.dfree && + RANY(p)->as.basic.klass != rb_cThread) { + p->as.free.flags = 0; + if ((long)RANY(p)->as.data.dfree == -1) { + RUBY_CRITICAL(free(DATA_PTR(p))); + } + else if (RANY(p)->as.data.dfree) { + (*RANY(p)->as.data.dfree)(DATA_PTR(p)); + } + } + else if (BUILTIN_TYPE(p) == T_FILE) { + if (rb_io_fptr_finalize(RANY(p)->as.file.fptr)) { + p->as.free.flags = 0; + } + } + p++; + } + } +} + +/* + * call-seq: + * ObjectSpace._id2ref(object_id) -> an_object + * + * Converts an object id to a reference to the object. May not be + * called on an object id passed as a parameter to a finalizer. + * + * s = "I am a string" #=> "I am a string" + * r = ObjectSpace._id2ref(s.object_id) #=> "I am a string" + * r == s #=> true + * + */ + +static VALUE +id2ref(obj, id) + VALUE obj, id; +{ + unsigned long ptr, p0; + + rb_secure(4); + p0 = ptr = NUM2ULONG(id); + if (ptr == Qtrue) return Qtrue; + if (ptr == Qfalse) return Qfalse; + if (ptr == Qnil) return Qnil; + if (FIXNUM_P(ptr)) return (VALUE)ptr; + if (SYMBOL_P(ptr) && rb_id2name(SYM2ID((VALUE)ptr)) != 0) { + return (VALUE)ptr; + } + + ptr = id ^ FIXNUM_FLAG; /* unset FIXNUM_FLAG */ + if (!is_pointer_to_heap((void *)ptr)|| BUILTIN_TYPE(ptr) >= T_BLOCK) { + rb_raise(rb_eRangeError, "0x%lx is not id value", p0); + } + if (BUILTIN_TYPE(ptr) == 0 || RBASIC(ptr)->klass == 0) { + rb_raise(rb_eRangeError, "0x%lx is recycled object", p0); + } + return (VALUE)ptr; +} + +/* + * The <code>GC</code> module provides an interface to Ruby's mark and + * sweep garbage collection mechanism. Some of the underlying methods + * are also available via the <code>ObjectSpace</code> module. + */ + +void +Init_GC() +{ + VALUE rb_mObSpace; + + rb_mGC = rb_define_module("GC"); + rb_define_singleton_method(rb_mGC, "start", rb_gc_start, 0); + rb_define_singleton_method(rb_mGC, "enable", rb_gc_enable, 0); + rb_define_singleton_method(rb_mGC, "disable", rb_gc_disable, 0); + rb_define_method(rb_mGC, "garbage_collect", rb_gc_start, 0); + + rb_mObSpace = rb_define_module("ObjectSpace"); + rb_define_module_function(rb_mObSpace, "each_object", os_each_obj, -1); + rb_define_module_function(rb_mObSpace, "garbage_collect", rb_gc_start, 0); + rb_define_module_function(rb_mObSpace, "add_finalizer", add_final, 1); + rb_define_module_function(rb_mObSpace, "remove_finalizer", rm_final, 1); + rb_define_module_function(rb_mObSpace, "finalizers", finals, 0); + rb_define_module_function(rb_mObSpace, "call_finalizer", call_final, 1); + + rb_define_module_function(rb_mObSpace, "define_finalizer", define_final, -1); + rb_define_module_function(rb_mObSpace, "undefine_finalizer", undefine_final, 1); + + rb_define_module_function(rb_mObSpace, "_id2ref", id2ref, 1); + + rb_gc_register_address(&rb_mObSpace); + rb_global_variable(&finalizers); + rb_gc_unregister_address(&rb_mObSpace); + finalizers = rb_ary_new(); + + source_filenames = st_init_strtable(); + + nomem_error = rb_exc_new2(rb_eNoMemError, "failed to allocate memory"); + rb_global_variable(&nomem_error); +} +/********************************************************************** + + hash.c - + + $Author: ocean $ + $Date: 2005/05/08 12:23:51 $ + created at: Mon Nov 22 18:51:18 JST 1993 + + Copyright (C) 1993-2003 Yukihiro Matsumoto + Copyright (C) 2000 Network Applied Communication Laboratory, Inc. + Copyright (C) 2000 Information-technology Promotion Agency, Japan + +**********************************************************************/ + +#include "ruby.h" +#include "st.h" +#include "util.h" +#include "rubysig.h" + +#ifdef __APPLE__ +#include <crt_externs.h> +#endif + +#define HASH_DELETED FL_USER1 +#define HASH_PROC_DEFAULT FL_USER2 + +VALUE +rb_hash_freeze(hash) + VALUE hash; +{ + return rb_obj_freeze(hash); +} + +VALUE rb_cHash; + +static VALUE envtbl; +static ID id_hash, id_call, id_default; + +static VALUE +eql(args) + VALUE *args; +{ + return (VALUE)rb_eql(args[0], args[1]); +} + +static int +rb_any_cmp(a, b) + VALUE a, b; +{ + VALUE args[2]; + + if (a == b) return 0; + if (FIXNUM_P(a) && FIXNUM_P(b)) { + return a != b; + } + if (TYPE(a) == T_STRING && RBASIC(a)->klass == rb_cString && + TYPE(b) == T_STRING && RBASIC(b)->klass == rb_cString) { + return rb_str_cmp(a, b); + } + if (a == Qundef || b == Qundef) return -1; + if (SYMBOL_P(a) && SYMBOL_P(b)) { + return a != b; + } + + args[0] = a; + args[1] = b; + return !rb_with_disable_interrupt(eql, (VALUE)args); +} + +VALUE +rb_hash(obj) + VALUE obj; +{ + return rb_funcall(obj, id_hash, 0); +} + +static int +rb_any_hash(a) + VALUE a; +{ + VALUE hval; + + switch (TYPE(a)) { + case T_FIXNUM: + case T_SYMBOL: + return (int)a; + break; + + case T_STRING: + return rb_str_hash(a); + break; + + default: + hval = rb_funcall(a, id_hash, 0); + if (!FIXNUM_P(hval)) { + hval = rb_funcall(hval, '%', 1, INT2FIX(536870923)); + } + return (int)FIX2LONG(hval); + } +} + +static struct st_hash_type objhash = { + rb_any_cmp, + rb_any_hash, +}; + +struct foreach_safe_arg { + st_table *tbl; + int (*func)(); + st_data_t arg; +}; + +static int +foreach_safe_i(key, value, arg) + st_data_t key, value; + struct foreach_safe_arg *arg; +{ + int status; + + if (key == Qundef) return ST_CONTINUE; + status = (*arg->func)(key, value, arg->arg); + if (status == ST_CONTINUE) { + return ST_CHECK; + } + return status; +} + +void +st_foreach_safe(table, func, a) + st_table *table; + int (*func)(); + st_data_t a; +{ + struct foreach_safe_arg arg; + + arg.tbl = table; + arg.func = func; + arg.arg = a; + if (st_foreach(table, foreach_safe_i, (st_data_t)&arg)) { + rb_raise(rb_eRuntimeError, "hash modified during iteration"); + } +} + +struct hash_foreach_arg { + VALUE hash; + int (*func)(); + VALUE arg; +}; + +static int +hash_foreach_iter(key, value, arg) + VALUE key, value; + struct hash_foreach_arg *arg; +{ + int status; + st_table *tbl; + + tbl = RHASH(arg->hash)->tbl; + if (key == Qundef) return ST_CONTINUE; + status = (*arg->func)(key, value, arg->arg); + if (RHASH(arg->hash)->tbl != tbl) { + rb_raise(rb_eRuntimeError, "rehash occurred during iteration"); + } + switch (status) { + case ST_DELETE: + st_delete_safe(tbl, (st_data_t*)&key, 0, Qundef); + FL_SET(arg->hash, HASH_DELETED); + case ST_CONTINUE: + break; + case ST_STOP: + return ST_STOP; + } + return ST_CHECK; +} + +static VALUE +hash_foreach_ensure(hash) + VALUE hash; +{ + RHASH(hash)->iter_lev--; + + if (RHASH(hash)->iter_lev == 0) { + if (FL_TEST(hash, HASH_DELETED)) { + st_cleanup_safe(RHASH(hash)->tbl, Qundef); + FL_UNSET(hash, HASH_DELETED); + } + } + return 0; +} + +static VALUE +hash_foreach_call(arg) + struct hash_foreach_arg *arg; +{ + if (st_foreach(RHASH(arg->hash)->tbl, hash_foreach_iter, (st_data_t)arg)) { + rb_raise(rb_eRuntimeError, "hash modified during iteration"); + } + return Qnil; +} + +void +rb_hash_foreach(hash, func, farg) + VALUE hash; + int (*func)(); + VALUE farg; +{ + struct hash_foreach_arg arg; + + RHASH(hash)->iter_lev++; + arg.hash = hash; + arg.func = func; + arg.arg = farg; + rb_ensure(hash_foreach_call, (VALUE)&arg, hash_foreach_ensure, hash); +} + +static VALUE hash_alloc _((VALUE)); + +static VALUE +hash_alloc(klass) + VALUE klass; +{ + NEWOBJ(hash, struct RHash); + OBJSETUP(hash, klass, T_HASH); + + hash->ifnone = Qnil; + hash->tbl = st_init_table(&objhash); + + return (VALUE)hash; +} + +VALUE +rb_hash_new() +{ + return hash_alloc(rb_cHash); +} + +static void +rb_hash_modify(hash) + VALUE hash; +{ + if (!RHASH(hash)->tbl) rb_raise(rb_eTypeError, "uninitialized Hash"); + if (OBJ_FROZEN(hash)) rb_error_frozen("hash"); + if (!OBJ_TAINTED(hash) && rb_safe_level() >= 4) + rb_raise(rb_eSecurityError, "Insecure: can't modify hash"); +} + +/* + * call-seq: + * Hash.new => hash + * Hash.new(obj) => aHash + * Hash.new {|hash, key| block } => aHash + * + * Returns a new, empty hash. If this hash is subsequently accessed by + * a key that doesn't correspond to a hash entry, the value returned + * depends on the style of <code>new</code> used to create the hash. In + * the first form, the access returns <code>nil</code>. If + * <i>obj</i> is specified, this single object will be used for + * all <em>default values</em>. If a block is specified, it will be + * called with the hash object and the key, and should return the + * default value. It is the block's responsibility to store the value + * in the hash if required. + * + * h = Hash.new("Go Fish") + * h["a"] = 100 + * h["b"] = 200 + * h["a"] #=> 100 + * h["c"] #=> "Go Fish" + * # The following alters the single default object + * h["c"].upcase! #=> "GO FISH" + * h["d"] #=> "GO FISH" + * h.keys #=> ["a", "b"] + * + * # While this creates a new default object each time + * h = Hash.new { |hash, key| hash[key] = "Go Fish: #{key}" } + * h["c"] #=> "Go Fish: c" + * h["c"].upcase! #=> "GO FISH: C" + * h["d"] #=> "Go Fish: d" + * h.keys #=> ["c", "d"] + * + */ + +static VALUE +rb_hash_initialize(argc, argv, hash) + int argc; + VALUE *argv; + VALUE hash; +{ + VALUE ifnone; + + rb_hash_modify(hash); + if (rb_block_given_p()) { + if (argc > 0) { + rb_raise(rb_eArgError, "wrong number of arguments"); + } + RHASH(hash)->ifnone = rb_block_proc(); + FL_SET(hash, HASH_PROC_DEFAULT); + } + else { + rb_scan_args(argc, argv, "01", &ifnone); + RHASH(hash)->ifnone = ifnone; + } + + return hash; +} + +/* + * call-seq: + * Hash[ [key =>|, value]* ] => hash + * + * Creates a new hash populated with the given objects. Equivalent to + * the literal <code>{ <i>key</i>, <i>value</i>, ... }</code>. Keys and + * values occur in pairs, so there must be an even number of arguments. + * + * Hash["a", 100, "b", 200] #=> {"a"=>100, "b"=>200} + * Hash["a" => 100, "b" => 200] #=> {"a"=>100, "b"=>200} + * { "a" => 100, "b" => 200 } #=> {"a"=>100, "b"=>200} + */ + +static VALUE +rb_hash_s_create(argc, argv, klass) + int argc; + VALUE *argv; + VALUE klass; +{ + VALUE hash; + int i; + + if (argc == 1 && TYPE(argv[0]) == T_HASH) { + hash = hash_alloc(klass); + + RHASH(hash)->ifnone = Qnil; + RHASH(hash)->tbl = st_copy(RHASH(argv[0])->tbl); + + return hash; + } + + if (argc % 2 != 0) { + rb_raise(rb_eArgError, "odd number of arguments for Hash"); + } + + hash = hash_alloc(klass); + for (i=0; i<argc; i+=2) { + rb_hash_aset(hash, argv[i], argv[i + 1]); + } + + return hash; +} + +static VALUE +to_hash(hash) + VALUE hash; +{ + return rb_convert_type(hash, T_HASH, "Hash", "to_hash"); +} + +static int +rb_hash_rehash_i(key, value, tbl) + VALUE key, value; + st_table *tbl; +{ + if (key != Qundef) st_insert(tbl, key, value); + return ST_CONTINUE; +} + +/* + * call-seq: + * hsh.rehash -> hsh + * + * Rebuilds the hash based on the current hash values for each key. If + * values of key objects have changed since they were inserted, this + * method will reindex <i>hsh</i>. If <code>Hash#rehash</code> is + * called while an iterator is traversing the hash, an + * <code>RuntimeError</code> will be raised in the iterator. + * + * a = [ "a", "b" ] + * c = [ "c", "d" ] + * h = { a => 100, c => 300 } + * h[a] #=> 100 + * a[0] = "z" + * h[a] #=> nil + * h.rehash #=> {["z", "b"]=>100, ["c", "d"]=>300} + * h[a] #=> 100 + */ + +static VALUE +rb_hash_rehash(hash) + VALUE hash; +{ + st_table *tbl; + + if (RHASH(hash)->iter_lev > 0) { + rb_raise(rb_eRuntimeError, "rehash during iteration"); + } + rb_hash_modify(hash); + tbl = st_init_table_with_size(&objhash, RHASH(hash)->tbl->num_entries); + rb_hash_foreach(hash, rb_hash_rehash_i, (st_data_t)tbl); + st_free_table(RHASH(hash)->tbl); + RHASH(hash)->tbl = tbl; + + return hash; +} + +/* + * call-seq: + * hsh[key] => value + * + * Element Reference---Retrieves the <i>value</i> object corresponding + * to the <i>key</i> object. If not found, returns the a default value (see + * <code>Hash::new</code> for details). + * + * h = { "a" => 100, "b" => 200 } + * h["a"] #=> 100 + * h["c"] #=> nil + * + */ + +VALUE +rb_hash_aref(hash, key) + VALUE hash, key; +{ + VALUE val; + + if (!st_lookup(RHASH(hash)->tbl, key, &val)) { + return rb_funcall(hash, id_default, 1, key); + } + return val; +} + +/* + * call-seq: + * hsh.fetch(key [, default] ) => obj + * hsh.fetch(key) {| key | block } => obj + * + * Returns a value from the hash for the given key. If the key can't be + * found, there are several options: With no other arguments, it will + * raise an <code>KeyError</code> exception; if <i>default</i> is + * given, then that will be returned; if the optional code block is + * specified, then that will be run and its result returned. + * + * h = { "a" => 100, "b" => 200 } + * h.fetch("a") #=> 100 + * h.fetch("z", "go fish") #=> "go fish" + * h.fetch("z") { |el| "go fish, #{el}"} #=> "go fish, z" + * + * The following example shows that an exception is raised if the key + * is not found and a default value is not supplied. + * + * h = { "a" => 100, "b" => 200 } + * h.fetch("z") + * + * <em>produces:</em> + * + * prog.rb:2:in `fetch': key not found (KeyError) + * from prog.rb:2 + * + */ + +static VALUE +rb_hash_fetch(argc, argv, hash) + int argc; + VALUE *argv; + VALUE hash; +{ + VALUE key, if_none; + VALUE val; + long block_given; + + rb_scan_args(argc, argv, "11", &key, &if_none); + + block_given = rb_block_given_p(); + if (block_given && argc == 2) { + rb_warn("block supersedes default value argument"); + } + if (!st_lookup(RHASH(hash)->tbl, key, &val)) { + if (block_given) return rb_yield(key); + if (argc == 1) { + rb_raise(rb_eKeyError, "key not found"); + } + return if_none; + } + return val; +} + +/* + * call-seq: + * hsh.default(key=nil) => obj + * + * Returns the default value, the value that would be returned by + * <i>hsh</i>[<i>key</i>] if <i>key</i> did not exist in <i>hsh</i>. + * See also <code>Hash::new</code> and <code>Hash#default=</code>. + * + * h = Hash.new #=> {} + * h.default #=> nil + * h.default(2) #=> nil + * + * h = Hash.new("cat") #=> {} + * h.default #=> "cat" + * h.default(2) #=> "cat" + * + * h = Hash.new {|h,k| h[k] = k.to_i*10} #=> {} + * h.default #=> 0 + * h.default(2) #=> 20 + */ + +static VALUE +rb_hash_default(argc, argv, hash) + int argc; + VALUE *argv; + VALUE hash; +{ + VALUE key; + + rb_scan_args(argc, argv, "01", &key); + if (FL_TEST(hash, HASH_PROC_DEFAULT)) { + return rb_funcall(RHASH(hash)->ifnone, id_call, 2, hash, key); + } + return RHASH(hash)->ifnone; +} + +/* + * call-seq: + * hsh.default = obj => hsh + * + * Sets the default value, the value returned for a key that does not + * exist in the hash. It is not possible to set the a default to a + * <code>Proc</code> that will be executed on each key lookup. + * + * h = { "a" => 100, "b" => 200 } + * h.default = "Go fish" + * h["a"] #=> 100 + * h["z"] #=> "Go fish" + * # This doesn't do what you might hope... + * h.default = proc do |hash, key| + * hash[key] = key + key + * end + * h[2] #=> #<Proc:0x401b3948@-:6> + * h["cat"] #=> #<Proc:0x401b3948@-:6> + */ + +static VALUE +rb_hash_set_default(hash, ifnone) + VALUE hash, ifnone; +{ + rb_hash_modify(hash); + RHASH(hash)->ifnone = ifnone; + FL_UNSET(hash, HASH_PROC_DEFAULT); + return ifnone; +} + +/* + * call-seq: + * hsh.default_proc -> anObject + * + * If <code>Hash::new</code> was invoked with a block, return that + * block, otherwise return <code>nil</code>. + * + * h = Hash.new {|h,k| h[k] = k*k } #=> {} + * p = h.default_proc #=> #<Proc:0x401b3d08@-:1> + * a = [] #=> [] + * p.call(a, 2) + * a #=> [nil, nil, 4] + */ + + +static VALUE +rb_hash_default_proc(hash) + VALUE hash; +{ + if (FL_TEST(hash, HASH_PROC_DEFAULT)) { + return RHASH(hash)->ifnone; + } + return Qnil; +} + +static int +key_i(key, value, args) + VALUE key, value; + VALUE *args; +{ + if (rb_equal(value, args[0])) { + args[1] = key; + return ST_STOP; + } + return ST_CONTINUE; +} + +/* + * call-seq: + * hsh.key(value) => key + * + * Returns the key for a given value. If not found, returns <code>nil</code>. + * + * h = { "a" => 100, "b" => 200 } + * h.key(200) #=> "b" + * h.key(999) #=> nil + * + */ + +static VALUE +rb_hash_key(hash, value) + VALUE hash, value; +{ + VALUE args[2]; + + args[0] = value; + args[1] = Qnil; + + rb_hash_foreach(hash, key_i, (st_data_t)args); + + return args[1]; +} + +/* :nodoc: */ +static VALUE +rb_hash_index(hash, value) + VALUE hash, value; +{ + rb_warn("Hash#index is deprecated; use Hash#key"); + return rb_hash_key(hash, value); +} + +/* + * call-seq: + * hsh.delete(key) => value + * hsh.delete(key) {| key | block } => value + * + * Deletes and returns a key-value pair from <i>hsh</i> whose key is + * equal to <i>key</i>. If the key is not found, returns the + * <em>default value</em>. If the optional code block is given and the + * key is not found, pass in the key and return the result of + * <i>block</i>. + * + * h = { "a" => 100, "b" => 200 } + * h.delete("a") #=> 100 + * h.delete("z") #=> nil + * h.delete("z") { |el| "#{el} not found" } #=> "z not found" + * + */ + +VALUE +rb_hash_delete(hash, key) + VALUE hash, key; +{ + VALUE val; + + rb_hash_modify(hash); + if (RHASH(hash)->iter_lev > 0) { + if (st_delete_safe(RHASH(hash)->tbl, (st_data_t*)&key, &val, Qundef)) { + FL_SET(hash, HASH_DELETED); + return val; + } + } + else if (st_delete(RHASH(hash)->tbl, (st_data_t*)&key, &val)) + return val; + if (rb_block_given_p()) { + return rb_yield(key); + } + return Qnil; +} + +struct shift_var { + int stop; + VALUE key; + VALUE val; +}; + +static int +shift_i(key, value, var) + VALUE key, value; + struct shift_var *var; +{ + if (key == Qundef) return ST_CONTINUE; + if (var->stop) return ST_STOP; + var->stop = 1; + var->key = key; + var->val = value; + return ST_DELETE; +} + +/* + * call-seq: + * hsh.shift -> anArray or obj + * + * Removes a key-value pair from <i>hsh</i> and returns it as the + * two-item array <code>[</code> <i>key, value</i> <code>]</code>, or + * the hash's default value if the hash is empty. + * + * h = { 1 => "a", 2 => "b", 3 => "c" } + * h.shift #=> [1, "a"] + * h #=> {2=>"b", 3=>"c"} + */ + +static VALUE +rb_hash_shift(hash) + VALUE hash; +{ + struct shift_var var; + + rb_hash_modify(hash); + var.stop = 0; + rb_hash_foreach(hash, shift_i, (st_data_t)&var); + + if (var.stop) { + return rb_assoc_new(var.key, var.val); + } + else if (FL_TEST(hash, HASH_PROC_DEFAULT)) { + return rb_funcall(RHASH(hash)->ifnone, id_call, 2, hash, Qnil); + } + else { + return RHASH(hash)->ifnone; + } +} + +static int +delete_if_i(key, value, hash) + VALUE key, value, hash; +{ + if (key == Qundef) return ST_CONTINUE; + if (RTEST(rb_yield_values(2, key, value))) { + rb_hash_delete(hash, key); + } + return ST_CONTINUE; +} + +/* + * call-seq: + * hsh.delete_if {| key, value | block } -> hsh + * + * Deletes every key-value pair from <i>hsh</i> for which <i>block</i> + * evaluates to <code>true</code>. + * + * h = { "a" => 100, "b" => 200, "c" => 300 } + * h.delete_if {|key, value| key >= "b" } #=> {"a"=>100} + * + */ + +VALUE +rb_hash_delete_if(hash) + VALUE hash; +{ + rb_hash_modify(hash); + rb_hash_foreach(hash, delete_if_i, hash); + return hash; +} + +/* + * call-seq: + * hsh.reject! {| key, value | block } -> hsh or nil + * + * Equivalent to <code>Hash#delete_if</code>, but returns + * <code>nil</code> if no changes were made. + */ + +VALUE +rb_hash_reject_bang(hash) + VALUE hash; +{ + int n = RHASH(hash)->tbl->num_entries; + rb_hash_delete_if(hash); + if (n == RHASH(hash)->tbl->num_entries) return Qnil; + return hash; +} + +/* + * call-seq: + * hsh.reject {| key, value | block } -> a_hash + * + * Same as <code>Hash#delete_if</code>, but works on (and returns) a + * copy of the <i>hsh</i>. Equivalent to + * <code><i>hsh</i>.dup.delete_if</code>. + * + */ + +static VALUE +rb_hash_reject(hash) + VALUE hash; +{ + return rb_hash_delete_if(rb_obj_dup(hash)); +} + +static int +select_i(key, value, result) + VALUE key, value, result; +{ + if (key == Qundef) return ST_CONTINUE; + if (RTEST(rb_yield_values(2, key, value))) + rb_ary_push(result, rb_assoc_new(key, value)); + return ST_CONTINUE; +} + +/* + * call-seq: + * hsh.values_at(key, ...) => array + * + * Return an array containing the values associated with the given keys. + * Also see <code>Hash.select</code>. + * + * h = { "cat" => "feline", "dog" => "canine", "cow" => "bovine" } + * h.values_at("cow", "cat") #=> ["bovine", "feline"] +*/ + +VALUE +rb_hash_values_at(argc, argv, hash) + int argc; + VALUE *argv; + VALUE hash; +{ + VALUE result = rb_ary_new2(argc); + long i; + + for (i=0; i<argc; i++) { + rb_ary_push(result, rb_hash_aref(hash, argv[i])); + } + return result; +} + +/* + * call-seq: + * hsh.select {|key, value| block} => array + * + * Returns a new array consisting of <code>[key,value]</code> + * pairs for which the block returns true. + * Also see <code>Hash.values_at</code>. + * + * h = { "a" => 100, "b" => 200, "c" => 300 } + * h.select {|k,v| k > "a"} #=> [["b", 200], ["c", 300]] + * h.select {|k,v| v < 200} #=> [["a", 100]] + */ + +VALUE +rb_hash_select(hash) + VALUE hash; +{ + VALUE result; + + result = rb_ary_new(); + rb_hash_foreach(hash, select_i, result); + return result; +} + +static int +clear_i(key, value, dummy) + VALUE key, value, dummy; +{ + return ST_DELETE; +} + +/* + * call-seq: + * hsh.clear -> hsh + * + * Removes all key-value pairs from <i>hsh</i>. + * + * h = { "a" => 100, "b" => 200 } #=> {"a"=>100, "b"=>200} + * h.clear #=> {} + * + */ + +static VALUE +rb_hash_clear(hash) + VALUE hash; +{ + rb_hash_modify(hash); + if (RHASH(hash)->tbl->num_entries > 0) { + rb_hash_foreach(hash, clear_i, 0); + } + + return hash; +} + +/* + * call-seq: + * hsh[key] = value => value + * hsh.store(key, value) => value + * + * Element Assignment---Associates the value given by + * <i>value</i> with the key given by <i>key</i>. + * <i>key</i> should not have its value changed while it is in + * use as a key (a <code>String</code> passed as a key will be + * duplicated and frozen). + * + * h = { "a" => 100, "b" => 200 } + * h["a"] = 9 + * h["c"] = 4 + * h #=> {"a"=>9, "b"=>200, "c"=>4} + * + */ + +VALUE +rb_hash_aset(hash, key, val) + VALUE hash, key, val; +{ + rb_hash_modify(hash); + if (TYPE(key) != T_STRING || st_lookup(RHASH(hash)->tbl, key, 0)) { + st_insert(RHASH(hash)->tbl, key, val); + } + else { + st_add_direct(RHASH(hash)->tbl, rb_str_new4(key), val); + } + return val; +} + +static int +replace_i(key, val, hash) + VALUE key, val, hash; +{ + if (key != Qundef) { + rb_hash_aset(hash, key, val); + } + + return ST_CONTINUE; +} + +/* + * call-seq: + * hsh.replace(other_hash) -> hsh + * + * Replaces the contents of <i>hsh</i> with the contents of + * <i>other_hash</i>. + * + * h = { "a" => 100, "b" => 200 } + * h.replace({ "c" => 300, "d" => 400 }) #=> {"c"=>300, "d"=>400} + * + */ + +static VALUE +rb_hash_replace(hash, hash2) + VALUE hash, hash2; +{ + hash2 = to_hash(hash2); + if (hash == hash2) return hash; + rb_hash_clear(hash); + rb_hash_foreach(hash2, replace_i, hash); + RHASH(hash)->ifnone = RHASH(hash2)->ifnone; + if (FL_TEST(hash2, HASH_PROC_DEFAULT)) { + FL_SET(hash, HASH_PROC_DEFAULT); + } + else { + FL_UNSET(hash, HASH_PROC_DEFAULT); + } + + return hash; +} + +/* + * call-seq: + * hsh.length => fixnum + * hsh.size => fixnum + * + * Returns the number of key-value pairs in the hash. + * + * h = { "d" => 100, "a" => 200, "v" => 300, "e" => 400 } + * h.length #=> 4 + * h.delete("a") #=> 200 + * h.length #=> 3 + */ + +static VALUE +rb_hash_size(hash) + VALUE hash; +{ + return INT2FIX(RHASH(hash)->tbl->num_entries); +} + + +/* + * call-seq: + * hsh.empty? => true or false + * + * Returns <code>true</code> if <i>hsh</i> contains no key-value pairs. + * + * {}.empty? #=> true + * + */ + +static VALUE +rb_hash_empty_p(hash) + VALUE hash; +{ + if (RHASH(hash)->tbl->num_entries == 0) + return Qtrue; + return Qfalse; +} + +static int +each_value_i(key, value) + VALUE key, value; +{ + if (key == Qundef) return ST_CONTINUE; + rb_yield(value); + return ST_CONTINUE; +} + +/* + * call-seq: + * hsh.each_value {| value | block } -> hsh + * + * Calls <i>block</i> once for each key in <i>hsh</i>, passing the + * value as a parameter. + * + * h = { "a" => 100, "b" => 200 } + * h.each_value {|value| puts value } + * + * <em>produces:</em> + * + * 100 + * 200 + */ + +static VALUE +rb_hash_each_value(hash) + VALUE hash; +{ + rb_hash_foreach(hash, each_value_i, 0); + return hash; +} + +static int +each_key_i(key, value) + VALUE key, value; +{ + if (key == Qundef) return ST_CONTINUE; + rb_yield(key); + return ST_CONTINUE; +} + +/* + * call-seq: + * hsh.each_key {| key | block } -> hsh + * + * Calls <i>block</i> once for each key in <i>hsh</i>, passing the key + * as a parameter. + * + * h = { "a" => 100, "b" => 200 } + * h.each_key {|key| puts key } + * + * <em>produces:</em> + * + * a + * b + */ +static VALUE +rb_hash_each_key(hash) + VALUE hash; +{ + rb_hash_foreach(hash, each_key_i, 0); + return hash; +} + +static int +each_pair_i(key, value) + VALUE key, value; +{ + if (key == Qundef) return ST_CONTINUE; + rb_yield_values(2, key, value); + return ST_CONTINUE; +} + +/* + * call-seq: + * hsh.each_pair {| key_value_array | block } -> hsh + * + * Calls <i>block</i> once for each key in <i>hsh</i>, passing the key + * and value as parameters. + * + * h = { "a" => 100, "b" => 200 } + * h.each_pair {|key, value| puts "#{key} is #{value}" } + * + * <em>produces:</em> + * + * a is 100 + * b is 200 + * + */ + +static VALUE +rb_hash_each_pair(hash) + VALUE hash; +{ + rb_hash_foreach(hash, each_pair_i, 0); + return hash; +} + +static int +each_i(key, value) + VALUE key, value; +{ + if (key == Qundef) return ST_CONTINUE; + rb_yield(rb_assoc_new(key, value)); + return ST_CONTINUE; +} + +/* + * call-seq: + * hsh.each {| key, value | block } -> hsh + * + * Calls <i>block</i> once for each key in <i>hsh</i>, passing the key + * and value to the block as a two-element array. Because of the assignment + * semantics of block parameters, these elements will be split out if the + * block has two formal parameters. Also see <code>Hash.each_pair</code>, which + * will be marginally more efficient for blocks with two parameters. + * + * h = { "a" => 100, "b" => 200 } + * h.each {|key, value| puts "#{key} is #{value}" } + * + * <em>produces:</em> + * + * a is 100 + * b is 200 + * + */ + +static VALUE +rb_hash_each(hash) + VALUE hash; +{ + rb_hash_foreach(hash, each_i, 0); + return hash; +} + +static int +to_a_i(key, value, ary) + VALUE key, value, ary; +{ + if (key == Qundef) return ST_CONTINUE; + rb_ary_push(ary, rb_assoc_new(key, value)); + return ST_CONTINUE; +} + +/* + * call-seq: + * hsh.to_a -> array + * + * Converts <i>hsh</i> to a nested array of <code>[</code> <i>key, + * value</i> <code>]</code> arrays. + * + * h = { "c" => 300, "a" => 100, "d" => 400, "c" => 300 } + * h.to_a #=> [["a", 100], ["c", 300], ["d", 400]] + */ + +static VALUE +rb_hash_to_a(hash) + VALUE hash; +{ + VALUE ary; + + ary = rb_ary_new(); + rb_hash_foreach(hash, to_a_i, ary); + if (OBJ_TAINTED(hash)) OBJ_TAINT(ary); + + return ary; +} + +/* + * call-seq: + * hsh.sort => array + * hsh.sort {| a, b | block } => array + * + * Converts <i>hsh</i> to a nested array of <code>[</code> <i>key, + * value</i> <code>]</code> arrays and sorts it, using + * <code>Array#sort</code>. + * + * h = { "a" => 20, "b" => 30, "c" => 10 } + * h.sort #=> [["a", 20], ["b", 30], ["c", 10]] + * h.sort {|a,b| a[1]<=>b[1]} #=> [["c", 10], ["a", 20], ["b", 30]] + * + */ + +static VALUE +rb_hash_sort(hash) + VALUE hash; +{ + VALUE entries = rb_hash_to_a(hash); + rb_ary_sort_bang(entries); + return entries; +} + +static int +inspect_i(key, value, str) + VALUE key, value, str; +{ + VALUE str2; + + if (key == Qundef) return ST_CONTINUE; + if (RSTRING(str)->len > 1) { + rb_str_cat2(str, ", "); + } + str2 = rb_inspect(key); + rb_str_buf_append(str, str2); + OBJ_INFECT(str, str2); + rb_str_buf_cat2(str, "=>"); + str2 = rb_inspect(value); + rb_str_buf_append(str, str2); + OBJ_INFECT(str, str2); + + return ST_CONTINUE; +} + +static VALUE +inspect_hash(hash, dummy, recur) + VALUE hash, dummy; + int recur; +{ + VALUE str; + + if (recur) return rb_str_new2("{...}"); + str = rb_str_buf_new2("{"); + rb_hash_foreach(hash, inspect_i, str); + rb_str_buf_cat2(str, "}"); + OBJ_INFECT(str, hash); + + return str; +} + +/* + * call-seq: + * hsh.inspect => string + * + * Return the contents of this hash as a string. + */ + +static VALUE +rb_hash_inspect(hash) + VALUE hash; +{ + if (RHASH(hash)->tbl == 0 || RHASH(hash)->tbl->num_entries == 0) + return rb_str_new2("{}"); + return rb_exec_recursive(inspect_hash, hash, 0); +} + +static VALUE +to_s_hash(hash, dummy, recur) + VALUE hash, dummy; + int recur; +{ + if (recur) return rb_str_new2("{...}"); + return rb_ary_to_s(rb_hash_to_a(hash)); +} + +/* + * call-seq: + * hsh.to_s => string + * + * Converts <i>hsh</i> to a string by converting the hash to an array + * of <code>[</code> <i>key, value</i> <code>]</code> pairs and then + * converting that array to a string using <code>Array#join</code> with + * the default separator. + * + * h = { "c" => 300, "a" => 100, "d" => 400, "c" => 300 } + * h.to_s #=> "a100c300d400" + */ + +static VALUE +rb_hash_to_s(hash) + VALUE hash; +{ + return rb_exec_recursive(to_s_hash, hash, 0); +} + +/* + * call-seq: + * hsh.to_hash => hsh + * + * Returns <i>self</i>. + */ + +static VALUE +rb_hash_to_hash(hash) + VALUE hash; +{ + return hash; +} + +static int +keys_i(key, value, ary) + VALUE key, value, ary; +{ + if (key == Qundef) return ST_CONTINUE; + rb_ary_push(ary, key); + return ST_CONTINUE; +} + +/* + * call-seq: + * hsh.keys => array + * + * Returns a new array populated with the keys from this hash. See also + * <code>Hash#values</code>. + * + * h = { "a" => 100, "b" => 200, "c" => 300, "d" => 400 } + * h.keys #=> ["a", "b", "c", "d"] + * + */ + +static VALUE +rb_hash_keys(hash) + VALUE hash; +{ + VALUE ary; + + ary = rb_ary_new(); + rb_hash_foreach(hash, keys_i, ary); + + return ary; +} + +static int +values_i(key, value, ary) + VALUE key, value, ary; +{ + if (key == Qundef) return ST_CONTINUE; + rb_ary_push(ary, value); + return ST_CONTINUE; +} + +/* + * call-seq: + * hsh.values => array + * + * Returns a new array populated with the values from <i>hsh</i>. See + * also <code>Hash#keys</code>. + * + * h = { "a" => 100, "b" => 200, "c" => 300 } + * h.values #=> [100, 200, 300] + * + */ + +static VALUE +rb_hash_values(hash) + VALUE hash; +{ + VALUE ary; + + ary = rb_ary_new(); + rb_hash_foreach(hash, values_i, ary); + + return ary; +} + +/* + * call-seq: + * hsh.has_key?(key) => true or false + * hsh.include?(key) => true or false + * hsh.key?(key) => true or false + * hsh.member?(key) => true or false + * + * Returns <code>true</code> if the given key is present in <i>hsh</i>. + * + * h = { "a" => 100, "b" => 200 } + * h.has_key?("a") #=> true + * h.has_key?("z") #=> false + * + */ + +static VALUE +rb_hash_has_key(hash, key) + VALUE hash; + VALUE key; +{ + if (st_lookup(RHASH(hash)->tbl, key, 0)) { + return Qtrue; + } + return Qfalse; +} + +static int +rb_hash_search_value(key, value, data) + VALUE key, value, *data; +{ + if (key == Qundef) return ST_CONTINUE; + if (rb_equal(value, data[1])) { + data[0] = Qtrue; + return ST_STOP; + } + return ST_CONTINUE; +} + +/* + * call-seq: + * hsh.has_value?(value) => true or false + * hsh.value?(value) => true or false + * + * Returns <code>true</code> if the given value is present for some key + * in <i>hsh</i>. + * + * h = { "a" => 100, "b" => 200 } + * h.has_value?(100) #=> true + * h.has_value?(999) #=> false + */ + +static VALUE +rb_hash_has_value(hash, val) + VALUE hash; + VALUE val; +{ + VALUE data[2]; + + data[0] = Qfalse; + data[1] = val; + rb_hash_foreach(hash, rb_hash_search_value, (st_data_t)data); + return data[0]; +} + +struct equal_data { + int result; + st_table *tbl; +}; + +static int +equal_i(key, val1, data) + VALUE key, val1; + struct equal_data *data; +{ + VALUE val2; + + if (key == Qundef) return ST_CONTINUE; + if (!st_lookup(data->tbl, key, &val2)) { + data->result = Qfalse; + return ST_STOP; + } + if (!rb_equal(val1, val2)) { + data->result = Qfalse; + return ST_STOP; + } + return ST_CONTINUE; +} + +static VALUE +hash_equal(hash1, hash2, eql) + VALUE hash1, hash2; + int eql; /* compare default value if true */ +{ + struct equal_data data; + + if (hash1 == hash2) return Qtrue; + if (TYPE(hash2) != T_HASH) { + if (!rb_respond_to(hash2, rb_intern("to_hash"))) { + return Qfalse; + } + return rb_equal(hash2, hash1); + } + if (RHASH(hash1)->tbl->num_entries != RHASH(hash2)->tbl->num_entries) + return Qfalse; + if (eql) { + if (!(rb_equal(RHASH(hash1)->ifnone, RHASH(hash2)->ifnone) && + FL_TEST(hash1, HASH_PROC_DEFAULT) == FL_TEST(hash2, HASH_PROC_DEFAULT))) + return Qfalse; + } + + data.tbl = RHASH(hash2)->tbl; + data.result = Qtrue; + rb_hash_foreach(hash1, equal_i, (st_data_t)&data); + + return data.result; +} + +/* + * call-seq: + * hsh == other_hash => true or false + * + * Equality---Two hashes are equal if they each contain the same number + * of keys and if each key-value pair is equal to (according to + * <code>Object#==</code>) the corresponding elements in the other + * hash. + * + * h1 = { "a" => 1, "c" => 2 } + * h2 = { 7 => 35, "c" => 2, "a" => 1 } + * h3 = { "a" => 1, "c" => 2, 7 => 35 } + * h4 = { "a" => 1, "d" => 2, "f" => 35 } + * h1 == h2 #=> false + * h2 == h3 #=> true + * h3 == h4 #=> false + * + */ + +static VALUE +rb_hash_equal(hash1, hash2) + VALUE hash1, hash2; +{ + return hash_equal(hash1, hash2, Qfalse); +} + +static int +rb_hash_invert_i(key, value, hash) + VALUE key, value; + VALUE hash; +{ + if (key == Qundef) return ST_CONTINUE; + rb_hash_aset(hash, value, key); + return ST_CONTINUE; +} + +/* + * call-seq: + * hsh.invert -> aHash + * + * Returns a new hash created by using <i>hsh</i>'s values as keys, and + * the keys as values. + * + * h = { "n" => 100, "m" => 100, "y" => 300, "d" => 200, "a" => 0 } + * h.invert #=> {0=>"a", 100=>"n", 200=>"d", 300=>"y"} + * + */ + +static VALUE +rb_hash_invert(hash) + VALUE hash; +{ + VALUE h = rb_hash_new(); + + rb_hash_foreach(hash, rb_hash_invert_i, h); + return h; +} + +static int +rb_hash_update_i(key, value, hash) + VALUE key, value; + VALUE hash; +{ + if (key == Qundef) return ST_CONTINUE; + rb_hash_aset(hash, key, value); + return ST_CONTINUE; +} + +static int +rb_hash_update_block_i(key, value, hash) + VALUE key, value; + VALUE hash; +{ + if (key == Qundef) return ST_CONTINUE; + if (rb_hash_has_key(hash, key)) { + value = rb_yield_values(3, key, rb_hash_aref(hash, key), value); + } + rb_hash_aset(hash, key, value); + return ST_CONTINUE; +} + +/* + * call-seq: + * hsh.merge!(other_hash) => hsh + * hsh.update(other_hash) => hsh + * hsh.merge!(other_hash){|key, oldval, newval| block} => hsh + * hsh.update(other_hash){|key, oldval, newval| block} => hsh + * + * Adds the contents of <i>other_hash</i> to <i>hsh</i>. If no + * block is specified entries with duplicate keys are overwritten + * with the values from <i>other_hash</i>, otherwise the value + * of each duplicate key is detemined by calling the block with + * the key, its value in <i>hsh</i> and its value in <i>other_hash</i>. + * + * h1 = { "a" => 100, "b" => 200 } + * h2 = { "b" => 254, "c" => 300 } + * h1.merge!(h2) #=> {"a"=>100, "b"=>254, "c"=>300} + * h1.merge!(h2) { |key, v1, v2| v1 } + * #=> {"a"=>100, "b"=>200, "c"=>300} + */ + +static VALUE +rb_hash_update(hash1, hash2) + VALUE hash1, hash2; +{ + hash2 = to_hash(hash2); + if (rb_block_given_p()) { + rb_hash_foreach(hash2, rb_hash_update_block_i, hash1); + } + else { + rb_hash_foreach(hash2, rb_hash_update_i, hash1); + } + return hash1; +} + +/* + * call-seq: + * hsh.merge(other_hash) -> a_hash + * hsh.merge(other_hash){|key, oldval, newval| block} -> a_hash + * + * Returns a new hash containing the contents of <i>other_hash</i> and + * the contents of <i>hsh</i>, overwriting entries in <i>hsh</i> with + * duplicate keys with those from <i>other_hash</i>. + * + * h1 = { "a" => 100, "b" => 200 } + * h2 = { "b" => 254, "c" => 300 } + * h1.merge(h2) #=> {"a"=>100, "b"=>254, "c"=>300} + * h1 #=> {"a"=>100, "b"=>200} + * + */ + +static VALUE +rb_hash_merge(hash1, hash2) + VALUE hash1, hash2; +{ + return rb_hash_update(rb_obj_dup(hash1), hash2); +} + +static int path_tainted = -1; + +static char **origenviron; +#ifdef _WIN32 +#define GET_ENVIRON(e) (e = rb_w32_get_environ()) +#define FREE_ENVIRON(e) rb_w32_free_environ(e) +static char **my_environ; +#undef environ +#define environ my_environ +#elif defined(__APPLE__) +#undef environ +#define environ (*_NSGetEnviron()) +#define GET_ENVIRON(e) (e) +#define FREE_ENVIRON(e) +#else +extern char **environ; +#define GET_ENVIRON(e) (e) +#define FREE_ENVIRON(e) +#endif + +static VALUE +env_str_new(ptr, len) + const char *ptr; + long len; +{ + VALUE str = rb_tainted_str_new(ptr, len); + + rb_obj_freeze(str); + return str; +} + +static VALUE +env_str_new2(ptr) + const char *ptr; +{ + if (!ptr) return Qnil; + return env_str_new(ptr, strlen(ptr)); +} + +static VALUE +env_delete(obj, name) + VALUE obj, name; +{ + char *nam, *val; + + rb_secure(4); + SafeStringValue(name); + nam = RSTRING(name)->ptr; + if (strlen(nam) != RSTRING(name)->len) { + rb_raise(rb_eArgError, "bad environment variable name"); + } + val = getenv(nam); + if (val) { + VALUE value = env_str_new2(val); + + ruby_setenv(nam, 0); +#ifdef ENV_IGNORECASE + if (strcasecmp(nam, PATH_ENV) == 0) +#else + if (strcmp(nam, PATH_ENV) == 0) +#endif + { + path_tainted = 0; + } + return value; + } + return Qnil; +} + +static VALUE +env_delete_m(obj, name) + VALUE obj, name; +{ + VALUE val; + + val = env_delete(obj, name); + if (NIL_P(val) && rb_block_given_p()) rb_yield(name); + return val; +} + +static VALUE +rb_f_getenv(obj, name) + VALUE obj, name; +{ + char *nam, *env; + + rb_secure(4); + SafeStringValue(name); + nam = RSTRING(name)->ptr; + if (strlen(nam) != RSTRING(name)->len) { + rb_raise(rb_eArgError, "bad environment variable name"); + } + env = getenv(nam); + if (env) { +#ifdef ENV_IGNORECASE + if (strcasecmp(nam, PATH_ENV) == 0 && !rb_env_path_tainted()) +#else + if (strcmp(nam, PATH_ENV) == 0 && !rb_env_path_tainted()) +#endif + { + VALUE str = rb_str_new2(env); + + rb_obj_freeze(str); + return str; + } + return env_str_new2(env); + } + return Qnil; +} + +static VALUE +env_fetch(argc, argv) + int argc; + VALUE *argv; +{ + VALUE key, if_none; + long block_given; + char *nam, *env; + + rb_secure(4); + rb_scan_args(argc, argv, "11", &key, &if_none); + block_given = rb_block_given_p(); + if (block_given && argc == 2) { + rb_warn("block supersedes default value argument"); + } + SafeStringValue(key); + nam = RSTRING(key)->ptr; + if (strlen(nam) != RSTRING(key)->len) { + rb_raise(rb_eArgError, "bad environment variable name"); + } + env = getenv(nam); + if (!env) { + if (block_given) return rb_yield(key); + if (argc == 1) { + rb_raise(rb_eKeyError, "key not found"); + } + return if_none; + } +#ifdef ENV_IGNORECASE + if (strcasecmp(nam, PATH_ENV) == 0 && !rb_env_path_tainted()) +#else + if (strcmp(nam, PATH_ENV) == 0 && !rb_env_path_tainted()) +#endif + return rb_str_new2(env); + return env_str_new2(env); +} + +static void +path_tainted_p(path) + char *path; +{ + path_tainted = rb_path_check(path)?0:1; +} + +int +rb_env_path_tainted() +{ + if (path_tainted < 0) { + path_tainted_p(getenv(PATH_ENV)); + } + return path_tainted; +} + +static int +envix(nam) + const char *nam; +{ + register int i, len = strlen(nam); + char **env; + + env = GET_ENVIRON(environ); + for (i = 0; env[i]; i++) { + if ( +#ifdef ENV_IGNORECASE + strncasecmp(env[i],nam,len) == 0 +#else + memcmp(env[i],nam,len) == 0 +#endif + && env[i][len] == '=') + break; /* memcmp must come first to avoid */ + } /* potential SEGV's */ + FREE_ENVIRON(environ); + return i; +} + +void +ruby_setenv(name, value) + const char *name; + const char *value; +{ +#if defined(_WIN32) + /* The sane way to deal with the environment. + * Has these advantages over putenv() & co.: + * * enables us to store a truly empty value in the + * environment (like in UNIX). + * * we don't have to deal with RTL globals, bugs and leaks. + * * Much faster. + * Why you may want to enable USE_WIN32_RTL_ENV: + * * environ[] and RTL functions will not reflect changes, + * which might be an issue if extensions want to access + * the env. via RTL. This cuts both ways, since RTL will + * not see changes made by extensions that call the Win32 + * functions directly, either. + * GSAR 97-06-07 + * + * REMARK: USE_WIN32_RTL_ENV is already obsoleted since we don't use + * RTL's environ global variable directly yet. + */ + SetEnvironmentVariable(name,value); +#elif defined __CYGWIN__ +#undef setenv +#undef unsetenv + if (value) + setenv(name,value,1); + else + unsetenv(name); +#else /* WIN32 */ + + int i=envix(name); /* where does it go? */ + + if (environ == origenviron) { /* need we copy environment? */ + int j; + int max; + char **tmpenv; + + for (max = i; environ[max]; max++) ; + tmpenv = ALLOC_N(char*, max+2); + for (j=0; j<max; j++) /* copy environment */ + tmpenv[j] = strdup(environ[j]); + tmpenv[max] = 0; + environ = tmpenv; /* tell exec where it is now */ + } + if (!value) { + if (environ != origenviron) { + char **envp = origenviron; + while (*envp && *envp != environ[i]) envp++; + if (!*envp) + free(environ[i]); + } + while (environ[i]) { + environ[i] = environ[i+1]; + i++; + } + return; + } + if (!environ[i]) { /* does not exist yet */ + REALLOC_N(environ, char*, i+2); /* just expand it a bit */ + environ[i+1] = 0; /* make sure it's null terminated */ + } + else { + if (environ[i] != origenviron[i]) + free(environ[i]); + } + environ[i] = ALLOC_N(char, strlen(name) + strlen(value) + 2); +#ifndef MSDOS + sprintf(environ[i],"%s=%s",name,value); /* all that work just for this */ +#else + /* MS-DOS requires environment variable names to be in uppercase */ + /* [Tom Dinger, 27 August 1990: Well, it doesn't _require_ it, but + * some utilities and applications may break because they only look + * for upper case strings. (Fixed strupr() bug here.)] + */ + strcpy(environ[i],name); strupr(environ[i]); + sprintf(environ[i] + strlen(name),"=%s", value); +#endif /* MSDOS */ + +#endif /* WIN32 */ +} + +void +ruby_unsetenv(name) + const char *name; +{ + ruby_setenv(name, 0); +} + +static VALUE +env_aset(obj, nm, val) + VALUE obj, nm, val; +{ + char *name, *value; + + if (rb_safe_level() >= 4) { + rb_raise(rb_eSecurityError, "can't change environment variable"); + } + + if (NIL_P(val)) { + env_delete(obj, nm); + return Qnil; + } + + StringValue(nm); + StringValue(val); + name = RSTRING(nm)->ptr; + value = RSTRING(val)->ptr; + if (strlen(name) != RSTRING(nm)->len) + rb_raise(rb_eArgError, "bad environment variable name"); + if (strlen(value) != RSTRING(val)->len) + rb_raise(rb_eArgError, "bad environment variable value"); + + ruby_setenv(name, value); +#ifdef ENV_IGNORECASE + if (strcasecmp(name, PATH_ENV) == 0) { +#else + if (strcmp(name, PATH_ENV) == 0) { +#endif + if (OBJ_TAINTED(val)) { + /* already tainted, no check */ + path_tainted = 1; + return val; + } + else { + path_tainted_p(value); + } + } + return val; +} + +static VALUE +env_keys() +{ + char **env; + VALUE ary; + + rb_secure(4); + ary = rb_ary_new(); + env = GET_ENVIRON(environ); + while (*env) { + char *s = strchr(*env, '='); + if (s) { + rb_ary_push(ary, env_str_new(*env, s-*env)); + } + env++; + } + FREE_ENVIRON(environ); + return ary; +} + +static VALUE +env_each_key(ehash) + VALUE ehash; +{ + VALUE keys; + long i; + + rb_secure(4); + keys = env_keys(); + for (i=0; i<RARRAY(keys)->len; i++) { + rb_yield(RARRAY(keys)->ptr[i]); + } + return ehash; +} + +static VALUE +env_values() +{ + VALUE ary; + char **env; + + rb_secure(4); + ary = rb_ary_new(); + env = GET_ENVIRON(environ); + while (*env) { + char *s = strchr(*env, '='); + if (s) { + rb_ary_push(ary, env_str_new2(s+1)); + } + env++; + } + FREE_ENVIRON(environ); + return ary; +} + +static VALUE +env_each_value(ehash) + VALUE ehash; +{ + VALUE values = env_values(); + long i; + + rb_secure(4); + values = env_values(); + for (i=0; i<RARRAY(values)->len; i++) { + rb_yield(RARRAY(values)->ptr[i]); + } + return ehash; +} + +static VALUE +env_each_i(ehash, values) + VALUE ehash; + int values; +{ + char **env; + VALUE ary; + long i; + + rb_secure(4); + ary = rb_ary_new(); + env = GET_ENVIRON(environ); + while (*env) { + char *s = strchr(*env, '='); + if (s) { + rb_ary_push(ary, env_str_new(*env, s-*env)); + rb_ary_push(ary, env_str_new2(s+1)); + } + env++; + } + FREE_ENVIRON(environ); + + for (i=0; i<RARRAY(ary)->len; i+=2) { + if (values) { + rb_yield_values(2, RARRAY(ary)->ptr[i], RARRAY(ary)->ptr[i+1]); + } + else { + rb_yield(rb_assoc_new(RARRAY(ary)->ptr[i], RARRAY(ary)->ptr[i+1])); + } + } + return ehash; +} + +static VALUE +env_each(ehash) + VALUE ehash; +{ + return env_each_i(ehash, Qfalse); +} + +static VALUE +env_each_pair(ehash) + VALUE ehash; +{ + return env_each_i(ehash, Qtrue); +} + +static VALUE +env_reject_bang() +{ + volatile VALUE keys; + long i; + int del = 0; + + rb_secure(4); + keys = env_keys(); + for (i=0; i<RARRAY(keys)->len; i++) { + VALUE val = rb_f_getenv(Qnil, RARRAY(keys)->ptr[i]); + if (!NIL_P(val)) { + if (RTEST(rb_yield_values(2, RARRAY(keys)->ptr[i], val))) { + FL_UNSET(RARRAY(keys)->ptr[i], FL_TAINT); + env_delete(Qnil, RARRAY(keys)->ptr[i]); + del++; + } + } + } + if (del == 0) return Qnil; + return envtbl; +} + +static VALUE +env_delete_if() +{ + env_reject_bang(); + return envtbl; +} + +static VALUE +env_values_at(argc, argv) + int argc; + VALUE *argv; +{ + VALUE result; + long i; + + rb_secure(4); + result = rb_ary_new(); + for (i=0; i<argc; i++) { + rb_ary_push(result, rb_f_getenv(Qnil, argv[i])); + } + return result; +} + +static VALUE +env_select() +{ + VALUE result; + char **env; + + rb_secure(4); + result = rb_ary_new(); + env = GET_ENVIRON(environ); + while (*env) { + char *s = strchr(*env, '='); + if (s) { + VALUE k = env_str_new(*env, s-*env); + VALUE v = env_str_new2(s+1); + if (RTEST(rb_yield_values(2, k, v))) { + rb_ary_push(result, rb_assoc_new(k, v)); + } + } + env++; + } + FREE_ENVIRON(environ); + + return result; +} + +static VALUE +env_clear() +{ + volatile VALUE keys; + long i; + + rb_secure(4); + keys = env_keys(); + for (i=0; i<RARRAY(keys)->len; i++) { + VALUE val = rb_f_getenv(Qnil, RARRAY(keys)->ptr[i]); + if (!NIL_P(val)) { + env_delete(Qnil, RARRAY(keys)->ptr[i]); + } + } + return envtbl; +} + +static VALUE +env_to_s() +{ + return rb_str_new2("ENV"); +} + +static VALUE +env_inspect() +{ + char **env; + VALUE str, i; + + rb_secure(4); + str = rb_str_buf_new2("{"); + env = GET_ENVIRON(environ); + while (*env) { + char *s = strchr(*env, '='); + + if (env != environ) { + rb_str_buf_cat2(str, ", "); + } + if (s) { + rb_str_buf_cat2(str, "\""); + rb_str_buf_cat(str, *env, s-*env); + rb_str_buf_cat2(str, "\"=>"); + i = rb_inspect(rb_str_new2(s+1)); + rb_str_buf_append(str, i); + } + env++; + } + FREE_ENVIRON(environ); + rb_str_buf_cat2(str, "}"); + OBJ_TAINT(str); + + return str; +} + +static VALUE +env_to_a() +{ + char **env; + VALUE ary; + + rb_secure(4); + ary = rb_ary_new(); + env = GET_ENVIRON(environ); + while (*env) { + char *s = strchr(*env, '='); + if (s) { + rb_ary_push(ary, rb_assoc_new(env_str_new(*env, s-*env), + env_str_new2(s+1))); + } + env++; + } + FREE_ENVIRON(environ); + return ary; +} + +static VALUE +env_none() +{ + return Qnil; +} + +static VALUE +env_size() +{ + int i; + char **env; + + rb_secure(4); + env = GET_ENVIRON(environ); + for(i=0; env[i]; i++) + ; + FREE_ENVIRON(environ); + return INT2FIX(i); +} + +static VALUE +env_empty_p() +{ + char **env; + + rb_secure(4); + env = GET_ENVIRON(environ); + if (env[0] == 0) { + FREE_ENVIRON(environ); + return Qtrue; + } + FREE_ENVIRON(environ); + return Qfalse; +} + +static VALUE +env_has_key(env, key) + VALUE env, key; +{ + char *s; + + rb_secure(4); + s = StringValuePtr(key); + if (strlen(s) != RSTRING(key)->len) + rb_raise(rb_eArgError, "bad environment variable name"); + if (getenv(s)) return Qtrue; + return Qfalse; +} + +static VALUE +env_has_value(dmy, value) + VALUE dmy, value; +{ + char **env; + + rb_secure(4); + if (TYPE(value) != T_STRING) return Qfalse; + env = GET_ENVIRON(environ); + while (*env) { + char *s = strchr(*env, '='); + if (s++) { + long len = strlen(s); + if (RSTRING(value)->len == len && strncmp(s, RSTRING(value)->ptr, len) == 0) { + FREE_ENVIRON(environ); + return Qtrue; + } + } + env++; + } + FREE_ENVIRON(environ); + return Qfalse; +} + +static VALUE +env_key(dmy, value) + VALUE dmy, value; +{ + char **env; + VALUE str; + + rb_secure(4); + StringValue(value); + env = GET_ENVIRON(environ); + while (*env) { + char *s = strchr(*env, '='); + if (s++) { + long len = strlen(s); + if (RSTRING(value)->len == len && strncmp(s, RSTRING(value)->ptr, len) == 0) { + str = env_str_new(*env, s-*env-1); + FREE_ENVIRON(environ); + return str; + } + } + env++; + } + FREE_ENVIRON(environ); + return Qnil; +} + +static VALUE +env_index(dmy, value) + VALUE dmy, value; +{ + rb_warn("ENV.index is deprecated; use ENV.key"); + return env_key(dmy, value); +} + +static VALUE +env_to_hash() +{ + char **env; + VALUE hash; + + rb_secure(4); + hash = rb_hash_new(); + env = GET_ENVIRON(environ); + while (*env) { + char *s = strchr(*env, '='); + if (s) { + rb_hash_aset(hash, env_str_new(*env, s-*env), + env_str_new2(s+1)); + } + env++; + } + FREE_ENVIRON(environ); + return hash; +} + +static VALUE +env_reject() +{ + return rb_hash_delete_if(env_to_hash()); +} + +static VALUE +env_shift() +{ + char **env; + + rb_secure(4); + env = GET_ENVIRON(environ); + if (*env) { + char *s = strchr(*env, '='); + if (s) { + VALUE key = env_str_new(*env, s-*env); + VALUE val = env_str_new2(getenv(RSTRING(key)->ptr)); + env_delete(Qnil, key); + return rb_assoc_new(key, val); + } + } + FREE_ENVIRON(environ); + return Qnil; +} + +static VALUE +env_invert() +{ + return rb_hash_invert(env_to_hash()); +} + +static int +env_replace_i(key, val, keys) + VALUE key, val, keys; +{ + if (key != Qundef) { + env_aset(Qnil, key, val); + if (rb_ary_includes(keys, key)) { + rb_ary_delete(keys, key); + } + } + return ST_CONTINUE; +} + +static VALUE +env_replace(env, hash) + VALUE env, hash; +{ + volatile VALUE keys; + long i; + + rb_secure(4); + keys = env_keys(); + if (env == hash) return env; + hash = to_hash(hash); + rb_hash_foreach(hash, env_replace_i, keys); + + for (i=0; i<RARRAY(keys)->len; i++) { + env_delete(env, RARRAY(keys)->ptr[i]); + } + return env; +} + +static int +env_update_i(key, val) + VALUE key, val; +{ + if (key != Qundef) { + if (rb_block_given_p()) { + val = rb_yield_values(3, key, rb_f_getenv(Qnil, key), val); + } + env_aset(Qnil, key, val); + } + return ST_CONTINUE; +} + +static VALUE +env_update(env, hash) + VALUE env, hash; +{ + rb_secure(4); + if (env == hash) return env; + hash = to_hash(hash); + rb_hash_foreach(hash, env_update_i, 0); + return env; +} + +/* + * A <code>Hash</code> is a collection of key-value pairs. It is + * similar to an <code>Array</code>, except that indexing is done via + * arbitrary keys of any object type, not an integer index. The order + * in which you traverse a hash by either key or value may seem + * arbitrary, and will generally not be in the insertion order. + * + * Hashes have a <em>default value</em> that is returned when accessing + * keys that do not exist in the hash. By default, that value is + * <code>nil</code>. + * + */ + +void +Init_Hash() +{ + id_hash = rb_intern("hash"); + id_call = rb_intern("call"); + id_default = rb_intern("default"); + + rb_cHash = rb_define_class("Hash", rb_cObject); + + rb_include_module(rb_cHash, rb_mEnumerable); + + rb_define_alloc_func(rb_cHash, hash_alloc); + rb_define_singleton_method(rb_cHash, "[]", rb_hash_s_create, -1); + rb_define_method(rb_cHash,"initialize", rb_hash_initialize, -1); + rb_define_method(rb_cHash,"initialize_copy", rb_hash_replace, 1); + rb_define_method(rb_cHash,"rehash", rb_hash_rehash, 0); + + rb_define_method(rb_cHash,"to_hash", rb_hash_to_hash, 0); + rb_define_method(rb_cHash,"to_a", rb_hash_to_a, 0); + rb_define_method(rb_cHash,"to_s", rb_hash_to_s, 0); + rb_define_method(rb_cHash,"inspect", rb_hash_inspect, 0); + + rb_define_method(rb_cHash,"==", rb_hash_equal, 1); + rb_define_method(rb_cHash,"[]", rb_hash_aref, 1); + rb_define_method(rb_cHash,"fetch", rb_hash_fetch, -1); + rb_define_method(rb_cHash,"[]=", rb_hash_aset, 2); + rb_define_method(rb_cHash,"store", rb_hash_aset, 2); + rb_define_method(rb_cHash,"default", rb_hash_default, -1); + rb_define_method(rb_cHash,"default=", rb_hash_set_default, 1); + rb_define_method(rb_cHash,"default_proc", rb_hash_default_proc, 0); + rb_define_method(rb_cHash,"key", rb_hash_key, 1); + rb_define_method(rb_cHash,"index", rb_hash_index, 1); + rb_define_method(rb_cHash,"size", rb_hash_size, 0); + rb_define_method(rb_cHash,"length", rb_hash_size, 0); + rb_define_method(rb_cHash,"empty?", rb_hash_empty_p, 0); + + rb_define_method(rb_cHash,"each", rb_hash_each, 0); + rb_define_method(rb_cHash,"each_value", rb_hash_each_value, 0); + rb_define_method(rb_cHash,"each_key", rb_hash_each_key, 0); + rb_define_method(rb_cHash,"each_pair", rb_hash_each_pair, 0); + rb_define_method(rb_cHash,"sort", rb_hash_sort, 0); + + rb_define_method(rb_cHash,"keys", rb_hash_keys, 0); + rb_define_method(rb_cHash,"values", rb_hash_values, 0); + rb_define_method(rb_cHash,"values_at", rb_hash_values_at, -1); + + rb_define_method(rb_cHash,"shift", rb_hash_shift, 0); + rb_define_method(rb_cHash,"delete", rb_hash_delete, 1); + rb_define_method(rb_cHash,"delete_if", rb_hash_delete_if, 0); + rb_define_method(rb_cHash,"select", rb_hash_select, 0); + rb_define_method(rb_cHash,"reject", rb_hash_reject, 0); + rb_define_method(rb_cHash,"reject!", rb_hash_reject_bang, 0); + rb_define_method(rb_cHash,"clear", rb_hash_clear, 0); + rb_define_method(rb_cHash,"invert", rb_hash_invert, 0); + rb_define_method(rb_cHash,"update", rb_hash_update, 1); + rb_define_method(rb_cHash,"replace", rb_hash_replace, 1); + rb_define_method(rb_cHash,"merge!", rb_hash_update, 1); + rb_define_method(rb_cHash,"merge", rb_hash_merge, 1); + + rb_define_method(rb_cHash,"include?", rb_hash_has_key, 1); + rb_define_method(rb_cHash,"member?", rb_hash_has_key, 1); + rb_define_method(rb_cHash,"has_key?", rb_hash_has_key, 1); + rb_define_method(rb_cHash,"has_value?", rb_hash_has_value, 1); + rb_define_method(rb_cHash,"key?", rb_hash_has_key, 1); + rb_define_method(rb_cHash,"value?", rb_hash_has_value, 1); + +#ifndef __MACOS__ /* environment variables nothing on MacOS. */ + origenviron = environ; + envtbl = rb_obj_alloc(rb_cObject); + rb_extend_object(envtbl, rb_mEnumerable); + + rb_define_singleton_method(envtbl,"[]", rb_f_getenv, 1); + rb_define_singleton_method(envtbl,"fetch", env_fetch, -1); + rb_define_singleton_method(envtbl,"[]=", env_aset, 2); + rb_define_singleton_method(envtbl,"store", env_aset, 2); + rb_define_singleton_method(envtbl,"each", env_each, 0); + rb_define_singleton_method(envtbl,"each_pair", env_each_pair, 0); + rb_define_singleton_method(envtbl,"each_key", env_each_key, 0); + rb_define_singleton_method(envtbl,"each_value", env_each_value, 0); + rb_define_singleton_method(envtbl,"delete", env_delete_m, 1); + rb_define_singleton_method(envtbl,"delete_if", env_delete_if, 0); + rb_define_singleton_method(envtbl,"clear", env_clear, 0); + rb_define_singleton_method(envtbl,"reject", env_reject, 0); + rb_define_singleton_method(envtbl,"reject!", env_reject_bang, 0); + rb_define_singleton_method(envtbl,"select", env_select, 0); + rb_define_singleton_method(envtbl,"shift", env_shift, 0); + rb_define_singleton_method(envtbl,"invert", env_invert, 0); + rb_define_singleton_method(envtbl,"replace", env_replace, 1); + rb_define_singleton_method(envtbl,"update", env_update, 1); + rb_define_singleton_method(envtbl,"inspect", env_inspect, 0); + rb_define_singleton_method(envtbl,"rehash", env_none, 0); + rb_define_singleton_method(envtbl,"to_a", env_to_a, 0); + rb_define_singleton_method(envtbl,"to_s", env_to_s, 0); + rb_define_singleton_method(envtbl,"key", env_key, 1); + rb_define_singleton_method(envtbl,"index", env_index, 1); + rb_define_singleton_method(envtbl,"size", env_size, 0); + rb_define_singleton_method(envtbl,"length", env_size, 0); + rb_define_singleton_method(envtbl,"empty?", env_empty_p, 0); + rb_define_singleton_method(envtbl,"keys", env_keys, 0); + rb_define_singleton_method(envtbl,"values", env_values, 0); + rb_define_singleton_method(envtbl,"values_at", env_values_at, -1); + rb_define_singleton_method(envtbl,"include?", env_has_key, 1); + rb_define_singleton_method(envtbl,"member?", env_has_key, 1); + rb_define_singleton_method(envtbl,"has_key?", env_has_key, 1); + rb_define_singleton_method(envtbl,"has_value?", env_has_value, 1); + rb_define_singleton_method(envtbl,"key?", env_has_key, 1); + rb_define_singleton_method(envtbl,"value?", env_has_value, 1); + rb_define_singleton_method(envtbl,"to_hash", env_to_hash, 0); + + rb_define_global_const("ENV", envtbl); +#else /* __MACOS__ */ + envtbl = rb_hash_s_new(0, NULL, rb_cHash); + rb_define_global_const("ENV", envtbl); +#endif /* ifndef __MACOS__ environment variables nothing on MacOS. */ +} +/********************************************************************** + + inits.c - + + $Author: dave $ + $Date: 2003/12/19 03:58:57 $ + created at: Tue Dec 28 16:01:58 JST 1993 + + Copyright (C) 1993-2003 Yukihiro Matsumoto + +**********************************************************************/ + +#include "ruby.h" + +void Init_Array _((void)); +void Init_Bignum _((void)); +void Init_Binding _((void)); +void Init_Comparable _((void)); +void Init_Dir _((void)); +void Init_Enumerable _((void)); +void Init_Exception _((void)); +void Init_syserr _((void)); +void Init_eval _((void)); +void Init_load _((void)); +void Init_Proc _((void)); +void Init_Thread _((void)); +void Init_File _((void)); +void Init_GC _((void)); +void Init_Hash _((void)); +void Init_IO _((void)); +void Init_Math _((void)); +void Init_marshal _((void)); +void Init_Numeric _((void)); +void Init_Object _((void)); +void Init_pack _((void)); +void Init_Precision _((void)); +void Init_sym _((void)); +void Init_process _((void)); +void Init_Random _((void)); +void Init_Range _((void)); +void Init_Regexp _((void)); +void Init_signal _((void)); +void Init_String _((void)); +void Init_Struct _((void)); +void Init_Time _((void)); +void Init_var_tables _((void)); +void Init_version _((void)); + +void +rb_call_inits() +{ + Init_sym(); + Init_var_tables(); + Init_Object(); + Init_Comparable(); + Init_Enumerable(); + Init_Precision(); + Init_eval(); + Init_String(); + Init_Exception(); + Init_Thread(); + Init_Numeric(); + Init_Bignum(); + Init_syserr(); + Init_Array(); + Init_Hash(); + Init_Struct(); + Init_Regexp(); + Init_pack(); + Init_Range(); + Init_IO(); + Init_Dir(); + Init_Time(); + Init_Random(); + Init_signal(); + Init_process(); + Init_load(); + Init_Proc(); + Init_Binding(); + Init_Math(); + Init_GC(); + Init_marshal(); + Init_version(); +} +/********************************************************************** + + io.c - + + $Author: matz $ + $Date: 2005/03/07 02:05:07 $ + created at: Fri Oct 15 18:08:59 JST 1993 + + Copyright (C) 1993-2003 Yukihiro Matsumoto + Copyright (C) 2000 Network Applied Communication Laboratory, Inc. + Copyright (C) 2000 Information-technology Promotion Agency, Japan + +**********************************************************************/ + +#include "ruby.h" +#include "rubyio.h" +#include "rubysig.h" +#include <ctype.h> +#include <errno.h> + +#include <sys/types.h> +#if !defined(_WIN32) && !defined(__DJGPP__) +# if defined(__BEOS__) +# include <net/socket.h> +# else +# include <sys/socket.h> +# endif +#endif + +#if defined(MSDOS) || defined(__BOW__) || defined(__CYGWIN__) || defined(_WIN32) || defined(__human68k__) || defined(__EMX__) || defined(__BEOS__) +# define NO_SAFE_RENAME +#endif + +#if defined(MSDOS) || defined(__CYGWIN__) || defined(_WIN32) +# define NO_LONG_FNAME +#endif + +#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(sun) || defined(_nec_ews) +# define USE_SETVBUF +#endif + +#ifdef __QNXNTO__ +#include "unix.h" +#endif + +#include <sys/types.h> +#if !defined(DJGPP) && !defined(_WIN32) && !defined(__human68k__) +#include <sys/ioctl.h> +#endif +#if defined(HAVE_FCNTL_H) || defined(_WIN32) +#include <fcntl.h> +#elif defined(HAVE_SYS_FCNTL_H) +#include <sys/fcntl.h> +#endif + +#if !HAVE_OFF_T && !defined(off_t) +# define off_t long +#endif + +#include <sys/stat.h> + +/* EMX has sys/param.h, but.. */ +#if defined(HAVE_SYS_PARAM_H) && !(defined(__EMX__) || defined(__HIUX_MPP__)) +# include <sys/param.h> +#endif + +#if !defined NOFILE +# define NOFILE 64 +#endif + +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif + +extern void Init_File _((void)); + +#ifdef __BEOS__ +# ifndef NOFILE +# define NOFILE (OPEN_MAX) +# endif +#include <net/socket.h> +#endif + +#include "util.h" + +#ifndef O_ACCMODE +#define O_ACCMODE (O_RDONLY | O_WRONLY | O_RDWR) +#endif + +#if SIZEOF_OFF_T > SIZEOF_LONG && !defined(HAVE_LONG_LONG) +# error off_t is bigger than long, but you have no long long... +#endif + +VALUE rb_cIO; +VALUE rb_eEOFError; +VALUE rb_eIOError; + +VALUE rb_stdin, rb_stdout, rb_stderr; +VALUE rb_deferr; /* rescue VIM plugin */ +static VALUE orig_stdout, orig_stderr; + +VALUE rb_output_fs; +VALUE rb_rs; +VALUE rb_output_rs; +VALUE rb_default_rs; + +static VALUE argf; + +static ID id_write, id_read, id_getc, id_flush; + +extern char *ruby_inplace_mode; + +struct timeval rb_time_interval _((VALUE)); + +static VALUE filename, current_file; +static int gets_lineno; +static int init_p = 0, next_p = 0; +static VALUE lineno = INT2FIX(0); + +#ifdef _STDIO_USES_IOSTREAM /* GNU libc */ +# ifdef _IO_fpos_t +# define STDIO_READ_DATA_PENDING(fp) ((fp)->_IO_read_ptr != (fp)->_IO_read_end) +# else +# define STDIO_READ_DATA_PENDING(fp) ((fp)->_gptr < (fp)->_egptr) +# endif +#elif defined(FILE_COUNT) +# define STDIO_READ_DATA_PENDING(fp) ((fp)->FILE_COUNT > 0) +#elif defined(FILE_READEND) +# define STDIO_READ_DATA_PENDING(fp) ((fp)->FILE_READPTR < (fp)->FILE_READEND) +#elif defined(__BEOS__) +# define STDIO_READ_DATA_PENDING(fp) (fp->_state._eof == 0) +#elif defined(__VMS) +# define STDIO_READ_DATA_PENDING(fp) (((unsigned int)(*(fp))->_cnt) > 0) +#else +# define STDIO_READ_DATA_PENDING(fp) (!feof(fp)) +#endif + +#if defined(__VMS) +#define fopen(file_spec, mode) fopen(file_spec, mode, "rfm=stmlf") +#define open(file_spec, flags, mode) open(file_spec, flags, mode, "rfm=stmlf") +#endif + +#define READ_DATA_PENDING(fptr) ((fptr)->rbuf_len) +#define READ_DATA_PENDING_COUNT(fptr) ((fptr)->rbuf_len) +#define READ_DATA_PENDING_PTR(fptr) ((fptr)->rbuf+(fptr)->rbuf_off) +#define READ_DATA_BUFFERED(fptr) READ_DATA_PENDING(fptr) + +#define READ_CHECK(fptr) do {\ + if (!READ_DATA_PENDING(fptr)) {\ + rb_thread_wait_fd((fptr)->fd);\ + rb_io_check_closed(fptr);\ + }\ +} while(0) + +#if defined(_WIN32) +#define is_socket(fd, path) rb_w32_is_socket(fd) +#elif defined(__DJGPP__) +#define is_socket(fd, path) 0 +#define shutdown(a,b) 0 +#else +static int +is_socket(fd, path) + int fd; + const char *path; +{ + struct stat sbuf; + if (fstat(fd, &sbuf) < 0) + rb_sys_fail(path); + return S_ISSOCK(sbuf.st_mode); +} +#endif + +void +rb_eof_error() +{ + rb_raise(rb_eEOFError, "end of file reached"); +} + +VALUE +rb_io_taint_check(io) + VALUE io; +{ + if (!OBJ_TAINTED(io) && rb_safe_level() >= 4) + rb_raise(rb_eSecurityError, "Insecure: operation on untainted IO"); + rb_check_frozen(io); + return io; +} + +void +rb_io_check_initialized(fptr) + OpenFile *fptr; +{ + if (!fptr) { + rb_raise(rb_eIOError, "uninitialized stream"); + } +} + +void +rb_io_check_closed(fptr) + OpenFile *fptr; +{ + rb_io_check_initialized(fptr); + if (fptr->fd < 0) { + rb_raise(rb_eIOError, "closed stream"); + } +} + +static int io_fflush _((OpenFile *)); + +static VALUE +rb_io_get_io(io) + VALUE io; +{ + return rb_convert_type(io, T_FILE, "IO", "to_io"); +} + +static VALUE +rb_io_check_io(io) + VALUE io; +{ + return rb_check_convert_type(io, T_FILE, "IO", "to_io"); +} + +static void +io_unread(OpenFile *fptr) +{ + off_t r; + rb_io_check_closed(fptr); + if (fptr->rbuf_len == 0 || fptr->mode & FMODE_DUPLEX) + return; + /* xxx: target position may be negative if buffer is filled by ungetc */ + r = lseek(fptr->fd, -fptr->rbuf_len, SEEK_CUR); + if (r < 0) { + if (errno == ESPIPE) + fptr->mode |= FMODE_DUPLEX; + return; + } + fptr->rbuf_off = 0; + fptr->rbuf_len = 0; + return; +} + +static int +io_ungetc(int c, OpenFile *fptr) +{ + if (fptr->rbuf == NULL) { + fptr->rbuf_off = 0; + fptr->rbuf_len = 0; + fptr->rbuf_capa = 8192; + fptr->rbuf = ALLOC_N(char, fptr->rbuf_capa); + } + if (c < 0 || fptr->rbuf_len == fptr->rbuf_capa) { + return -1; + } + if (fptr->rbuf_off == 0) { + if (fptr->rbuf_len) + MEMMOVE(fptr->rbuf+1, fptr->rbuf, char, fptr->rbuf_len); + fptr->rbuf_off = 1; + } + fptr->rbuf_off--; + fptr->rbuf_len++; + fptr->rbuf[fptr->rbuf_off] = c; + return c; +} + +static OpenFile * +flush_before_seek(fptr) + OpenFile *fptr; +{ + io_fflush(fptr); + io_unread(fptr); + return fptr; +} + +#define io_seek(fptr, ofs, whence) lseek(flush_before_seek(fptr)->fd, ofs, whence) +#define io_tell(fptr) lseek(flush_before_seek(fptr)->fd, 0, SEEK_CUR) + +#ifndef SEEK_CUR +# define SEEK_SET 0 +# define SEEK_CUR 1 +# define SEEK_END 2 +#endif + +#define FMODE_SYNCWRITE (FMODE_SYNC|FMODE_WRITABLE) + +void +rb_io_check_readable(fptr) + OpenFile *fptr; +{ + rb_io_check_closed(fptr); + if (!(fptr->mode & FMODE_READABLE)) { + rb_raise(rb_eIOError, "not opened for reading"); + } + if (fptr->wbuf_len) { + io_fflush(fptr); + } +} + +void +rb_io_check_writable(fptr) + OpenFile *fptr; +{ + rb_io_check_closed(fptr); + if (!(fptr->mode & FMODE_WRITABLE)) { + rb_raise(rb_eIOError, "not opened for writing"); + } + if (fptr->rbuf_len) { + io_unread(fptr); + } +} + +int +rb_read_pending(fp) + FILE *fp; +{ + return STDIO_READ_DATA_PENDING(fp); +} + +int +rb_io_read_pending(OpenFile *fptr) +{ + return READ_DATA_PENDING(fptr); +} + +void +rb_read_check(fp) + FILE *fp; +{ + if (!STDIO_READ_DATA_PENDING(fp)) { + rb_thread_wait_fd(fileno(fp)); + } +} + +void +rb_io_read_check(OpenFile *fptr) +{ + if (!READ_DATA_PENDING(fptr)) { + rb_thread_wait_fd(fptr->fd); + } + return; +} + +static int +ruby_dup(orig) + int orig; +{ + int fd; + + fd = dup(orig); + if (fd < 0) { + if (errno == EMFILE || errno == ENFILE) { + rb_gc(); + fd = dup(orig); + } + if (fd < 0) { + rb_sys_fail(0); + } + } + return fd; +} + +static VALUE io_alloc _((VALUE)); +static VALUE +io_alloc(klass) + VALUE klass; +{ + NEWOBJ(io, struct RFile); + OBJSETUP(io, klass, T_FILE); + + io->fptr = 0; + + return (VALUE)io; +} + +static int +io_fflush(fptr) + OpenFile *fptr; +{ + int r; + int wbuf_off, wbuf_len; + + rb_io_check_closed(fptr); + if (fptr->wbuf_len == 0) + return 0; + if (!rb_thread_fd_writable(fptr->fd)) { + rb_io_check_closed(fptr); + } + retry: + if (fptr->wbuf_len == 0) + return 0; + wbuf_off = fptr->wbuf_off; + wbuf_len = fptr->wbuf_len; + TRAP_BEG; + r = write(fptr->fd, fptr->wbuf+fptr->wbuf_off, fptr->wbuf_len); + TRAP_END; /* xxx: signal handler may modify wbuf */ + if (r == fptr->wbuf_len) { + fptr->wbuf_off = 0; + fptr->wbuf_len = 0; + return 0; + } + if (0 <= r) { + fptr->wbuf_off = (wbuf_off += r); + fptr->wbuf_len = (wbuf_len -= r); + errno = EAGAIN; + } + if (rb_io_wait_writable(fptr->fd)) { + rb_io_check_closed(fptr); + goto retry; + } + return -1; +} + +int +rb_io_wait_readable(f) + int f; +{ + fd_set rfds; + + switch (errno) { + case EINTR: +#if defined(ERESTART) + case ERESTART: +#endif + rb_thread_wait_fd(f); + return Qtrue; + + case EAGAIN: +#if defined(EWOULDBLOCK) && EWOULDBLOCK != EAGAIN + case EWOULDBLOCK: +#endif + FD_ZERO(&rfds); + FD_SET(f, &rfds); + rb_thread_select(f + 1, &rfds, NULL, NULL, NULL); + return Qtrue; + + default: + return Qfalse; + } +} + +int +rb_io_wait_writable(f) + int f; +{ + fd_set wfds; + + switch (errno) { + case EINTR: +#if defined(ERESTART) + case ERESTART: +#endif + rb_thread_fd_writable(f); + return Qtrue; + + case EAGAIN: +#if defined(EWOULDBLOCK) && EWOULDBLOCK != EAGAIN + case EWOULDBLOCK: +#endif + FD_ZERO(&wfds); + FD_SET(f, &wfds); + rb_thread_select(f + 1, NULL, &wfds, NULL, NULL); + return Qtrue; + + default: + return Qfalse; + } +} + +/* writing functions */ +static long +io_fwrite(str, fptr) + VALUE str; + OpenFile *fptr; +{ + long len, n, r, offset = 0; + + len = RSTRING(str)->len; + if ((n = len) <= 0) return n; + if (fptr->wbuf == NULL && !(fptr->mode & FMODE_SYNC)) { + fptr->wbuf_off = 0; + fptr->wbuf_len = 0; + fptr->wbuf_capa = 8192; + fptr->wbuf = ALLOC_N(char, fptr->wbuf_capa); + } + if ((fptr->mode & FMODE_SYNC) || + (fptr->wbuf && fptr->wbuf_capa <= fptr->wbuf_len + len) || + ((fptr->mode & FMODE_TTY) && memchr(RSTRING(str)->ptr+offset, '\n', len))) { + /* xxx: use writev to avoid double write if available */ + if (fptr->wbuf_len && fptr->wbuf_len+len <= fptr->wbuf_capa) { + if (fptr->wbuf_capa < fptr->wbuf_off+fptr->wbuf_len+len) { + MEMMOVE(fptr->wbuf, fptr->wbuf+fptr->wbuf_off, char, fptr->wbuf_len); + fptr->wbuf_off = 0; + } + MEMMOVE(fptr->wbuf+fptr->wbuf_off+fptr->wbuf_len, RSTRING(str)->ptr+offset, char, len); + fptr->wbuf_len += len; + n = 0; + } + if (io_fflush(fptr) < 0) + return -1L; + if (n == 0) + return len; + /* avoid context switch between "a" and "\n" in STDERR.puts "a". + [ruby-dev:25080] */ + if (fptr->stdio_file != stderr && !rb_thread_fd_writable(fptr->fd)) { + rb_io_check_closed(fptr); + } + retry: + TRAP_BEG; + r = write(fptr->fd, RSTRING(str)->ptr+offset, n); + TRAP_END; /* xxx: signal handler may modify given string. */ + if (r == n) return len; + if (0 <= r) { + offset += r; + n -= r; + errno = EAGAIN; + } + if (rb_io_wait_writable(fptr->fd)) { + rb_io_check_closed(fptr); + if (offset < RSTRING(str)->len) + goto retry; + } + return -1L; + } + + if (fptr->wbuf_off) { + if (fptr->wbuf_len) + MEMMOVE(fptr->wbuf, fptr->wbuf+fptr->wbuf_off, char, fptr->wbuf_len); + fptr->wbuf_off = 0; + } + MEMMOVE(fptr->wbuf+fptr->wbuf_off+fptr->wbuf_len, RSTRING(str)->ptr+offset, char, len); + fptr->wbuf_len += len; + return len; +} + +long +rb_io_fwrite(ptr, len, f) + const char *ptr; + long len; + FILE *f; +{ + OpenFile of; + + of.fd = fileno(f); + of.stdio_file = f; + of.mode = FMODE_WRITABLE; + of.path = NULL; + return io_fwrite(rb_str_new(ptr, len), &of); +} + +/* + * call-seq: + * ios.write(string) => integer + * + * Writes the given string to <em>ios</em>. The stream must be opened + * for writing. If the argument is not a string, it will be converted + * to a string using <code>to_s</code>. Returns the number of bytes + * written. + * + * count = $stdout.write( "This is a test\n" ) + * puts "That was #{count} bytes of data" + * + * <em>produces:</em> + * + * This is a test + * That was 15 bytes of data + */ + +static VALUE +io_write(io, str) + VALUE io, str; +{ + OpenFile *fptr; + long n; + VALUE tmp; + + rb_secure(4); + str = rb_obj_as_string(str); + tmp = rb_io_check_io(io); + if (NIL_P(tmp)) { + /* port is not IO, call write method for it. */ + return rb_funcall(io, id_write, 1, str); + } + io = tmp; + if (RSTRING(str)->len == 0) return INT2FIX(0); + + GetOpenFile(io, fptr); + rb_io_check_writable(fptr); + + n = io_fwrite(str, fptr); + if (n == -1L) rb_sys_fail(fptr->path); + + return LONG2FIX(n); +} + +VALUE +rb_io_write(io, str) + VALUE io, str; +{ + return rb_funcall(io, id_write, 1, str); +} + +/* + * call-seq: + * ios << obj => ios + * + * String Output---Writes <i>obj</i> to <em>ios</em>. + * <i>obj</i> will be converted to a string using + * <code>to_s</code>. + * + * $stdout << "Hello " << "world!\n" + * + * <em>produces:</em> + * + * Hello world! + */ + + +VALUE +rb_io_addstr(io, str) + VALUE io, str; +{ + rb_io_write(io, str); + return io; +} + +/* + * call-seq: + * ios.flush => ios + * + * Flushes any buffered data within <em>ios</em> to the underlying + * operating system (note that this is Ruby internal buffering only; + * the OS may buffer the data as well). + * + * $stdout.print "no newline" + * $stdout.flush + * + * <em>produces:</em> + * + * no newline + */ + +VALUE +rb_io_flush(io) + VALUE io; +{ + OpenFile *fptr; + + if (TYPE(io) != T_FILE) { + return rb_funcall(io, id_flush, 0); + } + + GetOpenFile(io, fptr); + + if (fptr->mode & FMODE_WRITABLE) { + io_fflush(fptr); + } + if (fptr->mode & FMODE_READABLE) { + io_unread(fptr); + } + + return io; +} + +/* + * call-seq: + * ios.pos => integer + * ios.tell => integer + * + * Returns the current offset (in bytes) of <em>ios</em>. + * + * f = File.new("testfile") + * f.pos #=> 0 + * f.gets #=> "This is line one\n" + * f.pos #=> 17 + */ + +static VALUE +rb_io_tell(io) + VALUE io; +{ + OpenFile *fptr; + off_t pos; + + GetOpenFile(io, fptr); + pos = io_tell(fptr); + if (pos < 0) rb_sys_fail(fptr->path); + return OFFT2NUM(pos); +} + +static VALUE +rb_io_seek(io, offset, whence) + VALUE io, offset; + int whence; +{ + OpenFile *fptr; + off_t pos; + + pos = NUM2OFFT(offset); + GetOpenFile(io, fptr); + pos = io_seek(fptr, pos, whence); + if (pos < 0) rb_sys_fail(fptr->path); + + return INT2FIX(0); +} + +/* + * call-seq: + * ios.seek(amount, whence=SEEK_SET) -> 0 + * + * Seeks to a given offset <i>anInteger</i> in the stream according to + * the value of <i>whence</i>: + * + * IO::SEEK_CUR | Seeks to _amount_ plus current position + * --------------+---------------------------------------------------- + * IO::SEEK_END | Seeks to _amount_ plus end of stream (you probably + * | want a negative value for _amount_) + * --------------+---------------------------------------------------- + * IO::SEEK_SET | Seeks to the absolute location given by _amount_ + * + * Example: + * + * f = File.new("testfile") + * f.seek(-13, IO::SEEK_END) #=> 0 + * f.readline #=> "And so on...\n" + */ + +static VALUE +rb_io_seek_m(argc, argv, io) + int argc; + VALUE *argv; + VALUE io; +{ + VALUE offset, ptrname; + int whence = SEEK_SET; + + if (rb_scan_args(argc, argv, "11", &offset, &ptrname) == 2) { + whence = NUM2INT(ptrname); + } + + return rb_io_seek(io, offset, whence); +} + +/* + * call-seq: + * ios.pos = integer => integer + * + * Seeks to the given position (in bytes) in <em>ios</em>. + * + * f = File.new("testfile") + * f.pos = 17 + * f.gets #=> "This is line two\n" + */ + +static VALUE +rb_io_set_pos(io, offset) + VALUE io, offset; +{ + OpenFile *fptr; + off_t pos; + + pos = NUM2OFFT(offset); + GetOpenFile(io, fptr); + pos = io_seek(fptr, pos, SEEK_SET); + if (pos < 0) rb_sys_fail(fptr->path); + + return OFFT2NUM(pos); +} + +/* + * call-seq: + * ios.rewind => 0 + * + * Positions <em>ios</em> to the beginning of input, resetting + * <code>lineno</code> to zero. + * + * f = File.new("testfile") + * f.readline #=> "This is line one\n" + * f.rewind #=> 0 + * f.lineno #=> 0 + * f.readline #=> "This is line one\n" + */ + +static VALUE +rb_io_rewind(io) + VALUE io; +{ + OpenFile *fptr; + + GetOpenFile(io, fptr); + if (io_seek(fptr, 0L, 0) < 0) rb_sys_fail(fptr->path); + if (io == current_file) { + gets_lineno -= fptr->lineno; + } + fptr->lineno = 0; + + return INT2FIX(0); +} + +static int +io_getc(OpenFile *fptr) +{ + int r; + if (fptr->fd == 0 && (fptr->mode & FMODE_TTY) && TYPE(rb_stdout) == T_FILE) { + OpenFile *ofp; + GetOpenFile(rb_stdout, ofp); + if (ofp->mode & FMODE_TTY) { + rb_io_flush(rb_stdout); + } + } + if (fptr->rbuf == NULL) { + fptr->rbuf_off = 0; + fptr->rbuf_len = 0; + fptr->rbuf_capa = 8192; + fptr->rbuf = ALLOC_N(char, fptr->rbuf_capa); + } + if (fptr->rbuf_len == 0) { + retry: + TRAP_BEG; + r = read(fptr->fd, fptr->rbuf, fptr->rbuf_capa); + TRAP_END; /* xxx: signal handler may modify rbuf */ + if (r < 0) { + if (rb_io_wait_readable(fptr->fd)) + goto retry; + rb_sys_fail(fptr->path); + } + fptr->rbuf_off = 0; + fptr->rbuf_len = r; + if (r == 0) + return -1; /* EOF */ + } + fptr->rbuf_off++; + fptr->rbuf_len--; + return (unsigned char)fptr->rbuf[fptr->rbuf_off-1]; +} + +/* + * call-seq: + * ios.eof => true or false + * ios.eof? => true or false + * + * Returns true if <em>ios</em> is at end of file. The stream must be + * opened for reading or an <code>IOError</code> will be raised. + * + * f = File.new("testfile") + * dummy = f.readlines + * f.eof #=> true + */ + +VALUE +rb_io_eof(io) + VALUE io; +{ + OpenFile *fptr; + int ch; + + GetOpenFile(io, fptr); + rb_io_check_readable(fptr); + + if (READ_DATA_PENDING(fptr)) return Qfalse; + READ_CHECK(fptr); + ch = io_getc(fptr); + + if (ch != EOF) { + io_ungetc(ch, fptr); + return Qfalse; + } + return Qtrue; +} + +/* + * call-seq: + * ios.sync => true or false + * + * Returns the current ``sync mode'' of <em>ios</em>. When sync mode is + * true, all output is immediately flushed to the underlying operating + * system and is not buffered by Ruby internally. See also + * <code>IO#fsync</code>. + * + * f = File.new("testfile") + * f.sync #=> false + */ + +static VALUE +rb_io_sync(io) + VALUE io; +{ + OpenFile *fptr; + + GetOpenFile(io, fptr); + return (fptr->mode & FMODE_SYNC) ? Qtrue : Qfalse; +} + +/* + * call-seq: + * ios.sync = boolean => boolean + * + * Sets the ``sync mode'' to <code>true</code> or <code>false</code>. + * When sync mode is true, all output is immediately flushed to the + * underlying operating system and is not buffered internally. Returns + * the new state. See also <code>IO#fsync</code>. + * + * f = File.new("testfile") + * f.sync = true + * + * <em>(produces no output)</em> + */ + +static VALUE +rb_io_set_sync(io, mode) + VALUE io, mode; +{ + OpenFile *fptr; + + GetOpenFile(io, fptr); + if (RTEST(mode)) { + fptr->mode |= FMODE_SYNC; + } + else { + fptr->mode &= ~FMODE_SYNC; + } + return mode; +} + +/* + * call-seq: + * ios.fsync => 0 or nil + * + * Immediately writes all buffered data in <em>ios</em> to disk. + * Returns <code>nil</code> if the underlying operating system does not + * support <em>fsync(2)</em>. Note that <code>fsync</code> differs from + * using <code>IO#sync=</code>. The latter ensures that data is flushed + * from Ruby's buffers, but doesn't not guarantee that the underlying + * operating system actually writes it to disk. + */ + +static VALUE +rb_io_fsync(io) + VALUE io; +{ +#ifdef HAVE_FSYNC + OpenFile *fptr; + + GetOpenFile(io, fptr); + + io_fflush(fptr); + if (fsync(fptr->fd) < 0) + rb_sys_fail(fptr->path); + return INT2FIX(0); +#else + rb_notimplement(); + return Qnil; /* not reached */ +#endif +} + +/* + * call-seq: + * ios.fileno => fixnum + * ios.to_i => fixnum + * + * Returns an integer representing the numeric file descriptor for + * <em>ios</em>. + * + * $stdin.fileno #=> 0 + * $stdout.fileno #=> 1 + */ + +static VALUE +rb_io_fileno(io) + VALUE io; +{ + OpenFile *fptr; + int fd; + + GetOpenFile(io, fptr); + fd = fptr->fd; + return INT2FIX(fd); +} + + +/* + * call-seq: + * ios.pid => fixnum + * + * Returns the process ID of a child process associated with + * <em>ios</em>. This will be set by <code>IO::popen</code>. + * + * pipe = IO.popen("-") + * if pipe + * $stderr.puts "In parent, child pid is #{pipe.pid}" + * else + * $stderr.puts "In child, pid is #{$$}" + * end + * + * <em>produces:</em> + * + * In child, pid is 26209 + * In parent, child pid is 26209 + */ + +static VALUE +rb_io_pid(io) + VALUE io; +{ + OpenFile *fptr; + + GetOpenFile(io, fptr); + if (!fptr->pid) + return Qnil; + return INT2FIX(fptr->pid); +} + + +/* + * call-seq: + * ios.inspect => string + * + * Return a string describing this IO object. + */ + +static VALUE +rb_io_inspect(obj) + VALUE obj; +{ + OpenFile *fptr; + char *buf, *cname, *st = ""; + long len; + + fptr = RFILE(rb_io_taint_check(obj))->fptr; + if (!fptr || !fptr->path) return rb_any_to_s(obj); + cname = rb_obj_classname(obj); + len = strlen(cname) + strlen(fptr->path) + 5; + if (fptr->fd < 0) { + st = " (closed)"; + len += 9; + } + buf = ALLOCA_N(char, len); + sprintf(buf, "#<%s:%s%s>", cname, fptr->path, st); + return rb_str_new2(buf); +} + +/* + * call-seq: + * ios.to_io -> ios + * + * Returns <em>ios</em>. + */ + +static VALUE +rb_io_to_io(io) + VALUE io; +{ + return io; +} + +/* reading functions */ +static long +read_buffered_data(char *ptr, long len, OpenFile *fptr) +{ + long n; + + n = READ_DATA_PENDING_COUNT(fptr); + if (n <= 0) return 0; + if (n > len) n = len; + MEMMOVE(ptr, fptr->rbuf+fptr->rbuf_off, char, n); + fptr->rbuf_off += n; + fptr->rbuf_len -= n; + return n; +} + +static long +io_fread(str, offset, fptr) + VALUE str; + long offset; + OpenFile *fptr; +{ + long len = RSTRING(str)->len - offset; + long n = len; + int c; + + while (n > 0) { + c = read_buffered_data(RSTRING(str)->ptr+offset, n, fptr); + if (c > 0) { + offset += c; + if ((n -= c) <= 0) break; + } + rb_thread_wait_fd(fptr->fd); + rb_io_check_closed(fptr); + c = io_getc(fptr); + if (c < 0) { + break; + } + RSTRING(str)->ptr[offset++] = c; + if (offset > RSTRING(str)->len) break; + n--; + } + return len - n; +} + +long +rb_io_fread(ptr, len, f) + char *ptr; + long len; + FILE *f; +{ + OpenFile of; + VALUE str; + long n; + + of.fd = fileno(f); + of.stdio_file = f; + of.mode = FMODE_READABLE; + str = rb_str_new(ptr, len); + n = io_fread(str, 0, &of); + MEMCPY(ptr, RSTRING(str)->ptr, char, n); + return n; +} + +#ifndef S_ISREG +# define S_ISREG(m) ((m & S_IFMT) == S_IFREG) +#endif + +#define SMALLBUF 100 + +static long +remain_size(fptr) + OpenFile *fptr; +{ + struct stat st; + off_t siz = READ_DATA_PENDING_COUNT(fptr); + off_t pos; + + if (fstat(fptr->fd, &st) == 0 && S_ISREG(st.st_mode) +#ifdef __BEOS__ + && (st.st_dev > 3) +#endif + ) + { + io_fflush(fptr); + pos = lseek(fptr->fd, 0, SEEK_CUR); + if (st.st_size >= pos && pos >= 0) { + siz += st.st_size - pos + 1; + if (siz > LONG_MAX) { + rb_raise(rb_eIOError, "file too big for single read"); + } + } + } + else { + siz += BUFSIZ; + } + return (long)siz; +} + +static VALUE +read_all(fptr, siz, str) + OpenFile *fptr; + long siz; + VALUE str; +{ + long bytes = 0; + long n; + + if (siz == 0) siz = BUFSIZ; + if (NIL_P(str)) { + str = rb_str_new(0, siz); + } + else { + rb_str_resize(str, siz); + } + for (;;) { + READ_CHECK(fptr); + n = io_fread(str, bytes, fptr); + if (n == 0 && bytes == 0) { + break; + } + bytes += n; + if (bytes < siz) break; + siz += BUFSIZ; + rb_str_resize(str, siz); + } + if (bytes != siz) rb_str_resize(str, bytes); + OBJ_TAINT(str); + + return str; +} + +static VALUE +io_getpartial(int argc, VALUE *argv, VALUE io) +{ + OpenFile *fptr; + VALUE length, str; + long n, len; + + rb_scan_args(argc, argv, "11", &length, &str); + + if ((len = NUM2LONG(length)) < 0) { + rb_raise(rb_eArgError, "negative length %ld given", len); + } + + if (NIL_P(str)) { + str = rb_str_new(0, len); + } + else { + StringValue(str); + rb_str_modify(str); + rb_str_resize(str, len); + } + OBJ_TAINT(str); + + GetOpenFile(io, fptr); + rb_io_check_readable(fptr); + + if (len == 0) + return str; + + READ_CHECK(fptr); + if (RSTRING(str)->len != len) { + modified: + rb_raise(rb_eRuntimeError, "buffer string modified"); + } + n = read_buffered_data(RSTRING(str)->ptr, len, fptr); + if (n <= 0) { + again: + if (RSTRING(str)->len != len) goto modified; + TRAP_BEG; + n = read(fptr->fd, RSTRING(str)->ptr, len); + TRAP_END; + if (n < 0) { + if (rb_io_wait_readable(fptr->fd)) + goto again; + rb_sys_fail(fptr->path); + } + } + rb_str_resize(str, n); + + if (n == 0) + return Qnil; + else + return str; +} + +/* + * call-seq: + * ios.readpartial(maxlen[, outbuf]) => string, outbuf + * + * Reads at most <i>maxlen</i> bytes from the I/O stream but + * it blocks only if <em>ios</em> has no data immediately available. + * If the optional <i>outbuf</i> argument is present, + * it must reference a String, which will receive the data. + * It raises <code>EOFError</code> on end of file. + * + * readpartial is designed for streams such as pipe, socket, tty, etc. + * It blocks only when no data immediately available. + * This means that it blocks only when following all conditions hold. + * * the buffer in the IO object is empty. + * * the content of the stream is empty. + * * the stream is not reached to EOF. + * + * When readpartial blocks, it waits data or EOF on the stream. + * If some data is reached, readpartial returns with the data. + * If EOF is reached, readpartial raises EOFError. + * + * When readpartial doesn't blocks, it returns or raises immediately. + * If the buffer is not empty, it returns the data in the buffer. + * Otherwise if the stream has some content, + * it returns the data in the stream. + * Otherwise if the stream is reached to EOF, it raises EOFError. + * + * r, w = IO.pipe # buffer pipe content + * w << "abc" # "" "abc". + * r.readpartial(4096) #=> "abc" "" "" + * r.readpartial(4096) # blocks because buffer and pipe is empty. + * + * r, w = IO.pipe # buffer pipe content + * w << "abc" # "" "abc" + * w.close # "" "abc" EOF + * r.readpartial(4096) #=> "abc" "" EOF + * r.readpartial(4096) # raises EOFError + * + * r, w = IO.pipe # buffer pipe content + * w << "abc\ndef\n" # "" "abc\ndef\n" + * r.gets #=> "abc\n" "def\n" "" + * w << "ghi\n" # "def\n" "ghi\n" + * r.readpartial(4096) #=> "def\n" "" "ghi\n" + * r.readpartial(4096) #=> "ghi\n" "" "" + * + * Note that readpartial is nonblocking-flag insensitive. + * It blocks even if the nonblocking-flag is set. + * + * Also note that readpartial behaves similar to sysread in blocking mode. + * The behavior is identical when the buffer is empty. + * + */ + +static VALUE +io_readpartial(argc, argv, io) + int argc; + VALUE *argv; + VALUE io; +{ + VALUE ret; + + ret = io_getpartial(argc, argv, io); + if (NIL_P(ret)) + rb_eof_error(); + else + return ret; +} + +/* + * call-seq: + * ios.read([length [, buffer]]) => string, buffer, or nil + * + * Reads at most <i>length</i> bytes from the I/O stream, or to the + * end of file if <i>length</i> is omitted or is <code>nil</code>. + * <i>length</i> must be a non-negative integer or nil. + * If the optional <i>buffer</i> argument is present, it must reference + * a String, which will receive the data. + * + * At end of file, it returns <code>nil</code> or <code>""</code> + * depend on <i>length</i>. + * <code><i>ios</i>.read()</code> and + * <code><i>ios</i>.read(nil)</code> returns <code>""</code>. + * <code><i>ios</i>.read(<i>positive-integer</i>)</code> returns nil. + * + * <code><i>ios</i>.read(0)</code> returns <code>""</code>. + * + * f = File.new("testfile") + * f.read(16) #=> "This is line one" + */ + +static VALUE +io_read(argc, argv, io) + int argc; + VALUE *argv; + VALUE io; +{ + OpenFile *fptr; + long n, len; + VALUE length, str; + + rb_scan_args(argc, argv, "02", &length, &str); + + if (NIL_P(length)) { + if (!NIL_P(str)) StringValue(str); + GetOpenFile(io, fptr); + rb_io_check_readable(fptr); + return read_all(fptr, remain_size(fptr), str); + } + len = NUM2LONG(length); + if (len < 0) { + rb_raise(rb_eArgError, "negative length %ld given", len); + } + + if (NIL_P(str)) { + str = rb_tainted_str_new(0, len); + } + else { + StringValue(str); + rb_str_modify(str); + rb_str_resize(str,len); + } + + GetOpenFile(io, fptr); + rb_io_check_readable(fptr); + if (len == 0) return str; + + READ_CHECK(fptr); + if (RSTRING(str)->len != len) { + rb_raise(rb_eRuntimeError, "buffer string modified"); + } + n = io_fread(str, 0, fptr); + if (n == 0) { + if (fptr->fd < 0) return Qnil; + rb_str_resize(str, 0); + return Qnil; + } + rb_str_resize(str, n); + RSTRING(str)->len = n; + RSTRING(str)->ptr[n] = '\0'; + OBJ_TAINT(str); + + return str; +} + +static int +appendline(fptr, delim, strp) + OpenFile *fptr; + int delim; + VALUE *strp; +{ + VALUE str = *strp; + int c = EOF; + + do { + long pending = READ_DATA_PENDING_COUNT(fptr); + if (pending > 0) { + const char *p = READ_DATA_PENDING_PTR(fptr); + const char *e = memchr(p, delim, pending); + long last = 0, len = (c != EOF); + if (e) pending = e - p + 1; + len += pending; + if (!NIL_P(str)) { + last = RSTRING(str)->len; + rb_str_resize(str, last + len); + } + else { + *strp = str = rb_str_buf_new(len); + RSTRING(str)->len = len; + RSTRING(str)->ptr[len] = '\0'; + } + if (c != EOF) { + RSTRING(str)->ptr[last++] = c; + } + read_buffered_data(RSTRING(str)->ptr + last, pending, fptr); /* must not fail */ + if (e) return delim; + } + else if (c != EOF) { + if (!NIL_P(str)) { + char ch = c; + rb_str_buf_cat(str, &ch, 1); + } + else { + *strp = str = rb_str_buf_new(1); + RSTRING(str)->ptr[RSTRING(str)->len++] = c; + } + } + rb_thread_wait_fd(fptr->fd); + rb_io_check_closed(fptr); + c = io_getc(fptr); + if (c < 0) { + return c; + } + } while (c != delim); + + { + char ch = c; + if (!NIL_P(str)) { + rb_str_cat(str, &ch, 1); + } + else { + *strp = str = rb_str_new(&ch, 1); + } + } + + return c; +} + +static inline int +swallow(fptr, term) + OpenFile *fptr; + int term; +{ + int c; + + do { + long cnt; + while ((cnt = READ_DATA_PENDING_COUNT(fptr)) > 0) { + char buf[1024]; + const char *p = READ_DATA_PENDING_PTR(fptr); + int i; + if (cnt > sizeof buf) cnt = sizeof buf; + if (*p != term) return Qtrue; + i = cnt; + while (--i && *++p == term); + if (!read_buffered_data(buf, cnt - i, fptr)) /* must not fail */ + rb_sys_fail(fptr->path); + } + rb_thread_wait_fd(fptr->fd); + rb_io_check_closed(fptr); + c = io_getc(fptr); + if (c != term) { + io_ungetc(c, fptr); + return Qtrue; + } + } while (c != EOF); + return Qfalse; +} + +static VALUE +rb_io_getline_fast(fptr, delim) + OpenFile *fptr; + unsigned char delim; +{ + VALUE str = Qnil; + int c; + + while ((c = appendline(fptr, delim, &str)) != EOF && c != delim); + + if (!NIL_P(str)) { + fptr->lineno++; + lineno = INT2FIX(fptr->lineno); + OBJ_TAINT(str); + } + + return str; +} + +static int +rscheck(rsptr, rslen, rs) + char *rsptr; + long rslen; + VALUE rs; +{ + if (RSTRING(rs)->ptr != rsptr && RSTRING(rs)->len != rslen) + rb_raise(rb_eRuntimeError, "rs modified"); + return 0; +} + +static VALUE +rb_io_getline(rs, io) + VALUE rs, io; +{ + VALUE str = Qnil; + OpenFile *fptr; + + GetOpenFile(io, fptr); + rb_io_check_readable(fptr); + if (NIL_P(rs)) { + str = read_all(fptr, 0, Qnil); + if (RSTRING(str)->len == 0) return Qnil; + } + else if (rs == rb_default_rs) { + return rb_io_getline_fast(fptr, '\n'); + } + else { + int c, newline; + char *rsptr; + long rslen; + int rspara = 0; + + rslen = RSTRING(rs)->len; + if (rslen == 0) { + rsptr = "\n\n"; + rslen = 2; + rspara = 1; + swallow(fptr, '\n'); + } + else if (rslen == 1) { + return rb_io_getline_fast(fptr, (unsigned char)RSTRING(rs)->ptr[0]); + } + else { + rsptr = RSTRING(rs)->ptr; + } + newline = rsptr[rslen - 1]; + + while ((c = appendline(fptr, newline, &str)) != EOF) { + if (c == newline) { + if (RSTRING(str)->len < rslen) continue; + if (!rspara) rscheck(rsptr, rslen, rs); + if (memcmp(RSTRING(str)->ptr + RSTRING(str)->len - rslen, + rsptr, rslen) == 0) break; + } + } + + if (rspara) { + if (c != EOF) { + swallow(fptr, '\n'); + } + } + } + + if (!NIL_P(str)) { + fptr->lineno++; + lineno = INT2FIX(fptr->lineno); + OBJ_TAINT(str); + } + + return str; +} + +VALUE +rb_io_gets(io) + VALUE io; +{ + OpenFile *fptr; + + GetOpenFile(io, fptr); + rb_io_check_readable(fptr); + return rb_io_getline_fast(fptr, '\n'); +} + +/* + * call-seq: + * ios.gets(sep_string=$/) => string or nil + * + * Reads the next ``line'' from the I/O stream; lines are separated by + * <i>sep_string</i>. A separator of <code>nil</code> reads the entire + * contents, and a zero-length separator reads the input a paragraph at + * a time (two successive newlines in the input separate paragraphs). + * The stream must be opened for reading or an <code>IOError</code> + * will be raised. The line read in will be returned and also assigned + * to <code>$_</code>. Returns <code>nil</code> if called at end of + * file. + * + * File.new("testfile").gets #=> "This is line one\n" + * $_ #=> "This is line one\n" + */ + +static VALUE +rb_io_gets_m(argc, argv, io) + int argc; + VALUE *argv; + VALUE io; +{ + VALUE rs, str; + + if (argc == 0) { + rs = rb_rs; + } + else { + rb_scan_args(argc, argv, "1", &rs); + if (!NIL_P(rs)) StringValue(rs); + } + str = rb_io_getline(rs, io); + rb_lastline_set(str); + + return str; +} + +/* + * call-seq: + * ios.lineno => integer + * + * Returns the current line number in <em>ios</em>. The stream must be + * opened for reading. <code>lineno</code> counts the number of times + * <code>gets</code> is called, rather than the number of newlines + * encountered. The two values will differ if <code>gets</code> is + * called with a separator other than newline. See also the + * <code>$.</code> variable. + * + * f = File.new("testfile") + * f.lineno #=> 0 + * f.gets #=> "This is line one\n" + * f.lineno #=> 1 + * f.gets #=> "This is line two\n" + * f.lineno #=> 2 + */ + +static VALUE +rb_io_lineno(io) + VALUE io; +{ + OpenFile *fptr; + + GetOpenFile(io, fptr); + rb_io_check_readable(fptr); + return INT2NUM(fptr->lineno); +} + +/* + * call-seq: + * ios.lineno = integer => integer + * + * Manually sets the current line number to the given value. + * <code>$.</code> is updated only on the next read. + * + * f = File.new("testfile") + * f.gets #=> "This is line one\n" + * $. #=> 1 + * f.lineno = 1000 + * f.lineno #=> 1000 + * $. # lineno of last read #=> 1 + * f.gets #=> "This is line two\n" + * $. # lineno of last read #=> 1001 + */ + +static VALUE +rb_io_set_lineno(io, lineno) + VALUE io, lineno; +{ + OpenFile *fptr; + + GetOpenFile(io, fptr); + rb_io_check_readable(fptr); + fptr->lineno = NUM2INT(lineno); + return lineno; +} + +static void +lineno_setter(val, id, var) + VALUE val; + ID id; + VALUE *var; +{ + gets_lineno = NUM2INT(val); + *var = INT2FIX(gets_lineno); +} + +static VALUE +argf_set_lineno(argf, val) + VALUE argf, val; +{ + gets_lineno = NUM2INT(val); + lineno = INT2FIX(gets_lineno); + return Qnil; +} + +static VALUE +argf_lineno() +{ + return lineno; +} + +/* + * call-seq: + * ios.readline(sep_string=$/) => string + * + * Reads a line as with <code>IO#gets</code>, but raises an + * <code>EOFError</code> on end of file. + */ + +static VALUE +rb_io_readline(argc, argv, io) + int argc; + VALUE *argv; + VALUE io; +{ + VALUE line = rb_io_gets_m(argc, argv, io); + + if (NIL_P(line)) { + rb_eof_error(); + } + return line; +} + +/* + * call-seq: + * ios.readlines(sep_string=$/) => array + * + * Reads all of the lines in <em>ios</em>, and returns them in + * <i>anArray</i>. Lines are separated by the optional + * <i>sep_string</i>. If <i>sep_string</i> is <code>nil</code>, the + * rest of the stream is returned as a single record. + * The stream must be opened for reading or an + * <code>IOError</code> will be raised. + * + * f = File.new("testfile") + * f.readlines[0] #=> "This is line one\n" + */ + +static VALUE +rb_io_readlines(argc, argv, io) + int argc; + VALUE *argv; + VALUE io; +{ + VALUE line, ary; + VALUE rs; + + if (argc == 0) { + rs = rb_rs; + } + else { + rb_scan_args(argc, argv, "1", &rs); + if (!NIL_P(rs)) StringValue(rs); + } + ary = rb_ary_new(); + while (!NIL_P(line = rb_io_getline(rs, io))) { + rb_ary_push(ary, line); + } + return ary; +} + +/* + * call-seq: + * ios.each(sep_string=$/) {|line| block } => ios + * ios.each_line(sep_string=$/) {|line| block } => ios + * + * Executes the block for every line in <em>ios</em>, where lines are + * separated by <i>sep_string</i>. <em>ios</em> must be opened for + * reading or an <code>IOError</code> will be raised. + * + * f = File.new("testfile") + * f.each {|line| puts "#{f.lineno}: #{line}" } + * + * <em>produces:</em> + * + * 1: This is line one + * 2: This is line two + * 3: This is line three + * 4: And so on... + */ + +static VALUE +rb_io_each_line(argc, argv, io) + int argc; + VALUE *argv; + VALUE io; +{ + VALUE str; + VALUE rs; + + if (argc == 0) { + rs = rb_rs; + } + else { + rb_scan_args(argc, argv, "1", &rs); + if (!NIL_P(rs)) StringValue(rs); + } + while (!NIL_P(str = rb_io_getline(rs, io))) { + rb_yield(str); + } + return io; +} + +/* + * call-seq: + * ios.each_byte {|byte| block } => ios + * + * Calls the given block once for each byte (0..255) in <em>ios</em>, + * passing the byte as an argument. The stream must be opened for + * reading or an <code>IOError</code> will be raised. + * + * f = File.new("testfile") + * checksum = 0 + * f.each_byte {|x| checksum ^= x } #=> #<File:testfile> + * checksum #=> 12 + */ + +static VALUE +rb_io_each_byte(io) + VALUE io; +{ + OpenFile *fptr; + int c; + + GetOpenFile(io, fptr); + + for (;;) { + rb_io_check_readable(fptr); + READ_CHECK(fptr); + c = io_getc(fptr); + if (c < 0) { + break; + } + rb_yield(INT2FIX(c & 0xff)); + } + return io; +} + +/* + * call-seq: + * ios.getc => fixnum or nil + * + * Gets the next 8-bit byte (0..255) from <em>ios</em>. Returns + * <code>nil</code> if called at end of file. + * + * f = File.new("testfile") + * f.getc #=> 84 + * f.getc #=> 104 + */ + +VALUE +rb_io_getc(io) + VALUE io; +{ + OpenFile *fptr; + int c; + + GetOpenFile(io, fptr); + rb_io_check_readable(fptr); + + READ_CHECK(fptr); + c = io_getc(fptr); + + if (c < 0) { + return Qnil; + } + return INT2FIX(c & 0xff); +} + +int +rb_getc(f) + FILE *f; +{ + int c; + + if (!STDIO_READ_DATA_PENDING(f)) { + rb_thread_wait_fd(fileno(f)); + } + TRAP_BEG; + c = getc(f); + TRAP_END; + + return c; +} + +/* + * call-seq: + * ios.readchar => fixnum + * + * Reads a character as with <code>IO#getc</code>, but raises an + * <code>EOFError</code> on end of file. + */ + +static VALUE +rb_io_readchar(io) + VALUE io; +{ + VALUE c = rb_io_getc(io); + + if (NIL_P(c)) { + rb_eof_error(); + } + return c; +} + +/* + * call-seq: + * ios.ungetc(integer) => nil + * + * Pushes back one character (passed as a parameter) onto <em>ios</em>, + * such that a subsequent buffered read will return it. Only one character + * may be pushed back before a subsequent read operation (that is, + * you will be able to read only the last of several characters that have been pushed + * back). Has no effect with unbuffered reads (such as <code>IO#sysread</code>). + * + * f = File.new("testfile") #=> #<File:testfile> + * c = f.getc #=> 84 + * f.ungetc(c) #=> nil + * f.getc #=> 84 + */ + +VALUE +rb_io_ungetc(io, c) + VALUE io, c; +{ + OpenFile *fptr; + int cc = NUM2INT(c); + + GetOpenFile(io, fptr); + rb_io_check_readable(fptr); + + if (io_ungetc(cc, fptr) == EOF && cc != EOF) { + rb_raise(rb_eIOError, "ungetc failed"); + } + return Qnil; +} + +/* + * call-seq: + * ios.isatty => true or false + * ios.tty? => true or false + * + * Returns <code>true</code> if <em>ios</em> is associated with a + * terminal device (tty), <code>false</code> otherwise. + * + * File.new("testfile").isatty #=> false + * File.new("/dev/tty").isatty #=> true + */ + +static VALUE +rb_io_isatty(io) + VALUE io; +{ + OpenFile *fptr; + + GetOpenFile(io, fptr); + if (isatty(fptr->fd) == 0) + return Qfalse; + return Qtrue; +} + +#define FMODE_PREP (1<<16) +#define IS_PREP_STDIO(f) ((f)->mode & FMODE_PREP) +#define PREP_STDIO_NAME(f) ((f)->path) + +static void +fptr_finalize(fptr, noraise) + OpenFile *fptr; + int noraise; +{ + if (fptr->wbuf_len) { + io_fflush(fptr); + } + if (IS_PREP_STDIO(fptr) || + fptr->fd <= 2) { + return; + } + if (fptr->stdio_file) { + if (fclose(fptr->stdio_file) < 0 && !noraise) { + /* fptr->stdio_file is deallocated anyway */ + fptr->stdio_file = 0; + fptr->fd = -1; + rb_sys_fail(fptr->path); + } + } + else if (0 <= fptr->fd) { + if (close(fptr->fd) < 0 && !noraise) { + /* fptr->fd is still not closed */ + rb_sys_fail(fptr->path); + } + } + fptr->fd = -1; + fptr->stdio_file = 0; + fptr->mode &= ~(FMODE_READABLE|FMODE_WRITABLE); +} + +static void +rb_io_fptr_cleanup(fptr, noraise) + OpenFile *fptr; + int noraise; +{ + if (fptr->finalize) { + (*fptr->finalize)(fptr, noraise); + } + else { + fptr_finalize(fptr, noraise); + } +} + +int +rb_io_fptr_finalize(fptr) + OpenFile *fptr; +{ + if (!fptr) return 0; + if (fptr->refcnt <= 0 || --fptr->refcnt) return 0; + if (fptr->path) { + free(fptr->path); + fptr->path = 0; + } + if (0 <= fptr->fd) + rb_io_fptr_cleanup(fptr, Qtrue); + if (fptr->rbuf) { + free(fptr->rbuf); + fptr->rbuf = 0; + } + if (fptr->wbuf) { + free(fptr->wbuf); + fptr->wbuf = 0; + } + free(fptr); + return 1; +} + +VALUE +rb_io_close(io) + VALUE io; +{ + OpenFile *fptr; + int fd; + + fptr = RFILE(io)->fptr; + if (!fptr) return Qnil; + if (fptr->fd < 0) return Qnil; + + fd = fptr->fd; + rb_io_fptr_cleanup(fptr, Qfalse); + rb_thread_fd_close(fd); + + if (fptr->pid) { + rb_syswait(fptr->pid); + fptr->pid = 0; + } + + return Qnil; +} + +/* + * call-seq: + * ios.close => nil + * + * Closes <em>ios</em> and flushes any pending writes to the operating + * system. The stream is unavailable for any further data operations; + * an <code>IOError</code> is raised if such an attempt is made. I/O + * streams are automatically closed when they are claimed by the + * garbage collector. + */ + +static VALUE +rb_io_close_m(io) + VALUE io; +{ + if (rb_safe_level() >= 4 && !OBJ_TAINTED(io)) { + rb_raise(rb_eSecurityError, "Insecure: can't close"); + } + rb_io_check_closed(RFILE(io)->fptr); + rb_io_close(io); + return Qnil; +} + +static VALUE +io_close(io) + VALUE io; +{ + return rb_funcall(io, rb_intern("close"), 0, 0); +} + +/* + * call-seq: + * ios.closed? => true or false + * + * Returns <code>true</code> if <em>ios</em> is completely closed (for + * duplex streams, both reader and writer), <code>false</code> + * otherwise. + * + * f = File.new("testfile") + * f.close #=> nil + * f.closed? #=> true + * f = IO.popen("/bin/sh","r+") + * f.close_write #=> nil + * f.closed? #=> false + * f.close_read #=> nil + * f.closed? #=> true + */ + + +static VALUE +rb_io_closed(io) + VALUE io; +{ + OpenFile *fptr; + + fptr = RFILE(io)->fptr; + rb_io_check_initialized(fptr); + return 0 <= fptr->fd ? Qfalse : Qtrue; +} + +/* + * call-seq: + * ios.close_read => nil + * + * Closes the read end of a duplex I/O stream (i.e., one that contains + * both a read and a write stream, such as a pipe). Will raise an + * <code>IOError</code> if the stream is not duplexed. + * + * f = IO.popen("/bin/sh","r+") + * f.close_read + * f.readlines + * + * <em>produces:</em> + * + * prog.rb:3:in `readlines': not opened for reading (IOError) + * from prog.rb:3 + */ + +static VALUE +rb_io_close_read(io) + VALUE io; +{ + OpenFile *fptr; + + if (rb_safe_level() >= 4 && !OBJ_TAINTED(io)) { + rb_raise(rb_eSecurityError, "Insecure: can't close"); + } + GetOpenFile(io, fptr); + if (is_socket(fptr->fd, fptr->path)) { +#ifndef SHUT_RD +# define SHUT_RD 0 +#endif + if (shutdown(fptr->fd, SHUT_RD) < 0) + rb_sys_fail(fptr->path); + fptr->mode &= ~FMODE_READABLE; + if (!(fptr->mode & FMODE_WRITABLE)) + return rb_io_close(io); + return Qnil; + } + if (fptr->mode & FMODE_WRITABLE) { + rb_raise(rb_eIOError, "closing non-duplex IO for reading"); + } + return rb_io_close(io); +} + +/* + * call-seq: + * ios.close_write => nil + * + * Closes the write end of a duplex I/O stream (i.e., one that contains + * both a read and a write stream, such as a pipe). Will raise an + * <code>IOError</code> if the stream is not duplexed. + * + * f = IO.popen("/bin/sh","r+") + * f.close_write + * f.print "nowhere" + * + * <em>produces:</em> + * + * prog.rb:3:in `write': not opened for writing (IOError) + * from prog.rb:3:in `print' + * from prog.rb:3 + */ + +static VALUE +rb_io_close_write(io) + VALUE io; +{ + OpenFile *fptr; + + if (rb_safe_level() >= 4 && !OBJ_TAINTED(io)) { + rb_raise(rb_eSecurityError, "Insecure: can't close"); + } + GetOpenFile(io, fptr); + if (is_socket(fptr->fd, fptr->path)) { +#ifndef SHUT_WR +# define SHUT_WR 1 +#endif + if (shutdown(fptr->fd, SHUT_WR) < 0) + rb_sys_fail(fptr->path); + fptr->mode &= ~FMODE_WRITABLE; + if (!(fptr->mode & FMODE_READABLE)) + return rb_io_close(io); + return Qnil; + } + + if (fptr->mode & FMODE_READABLE) { + rb_raise(rb_eIOError, "closing non-duplex IO for writing"); + } + return rb_io_close(io); +} + +/* + * call-seq: + * ios.sysseek(offset, whence=SEEK_SET) => integer + * + * Seeks to a given <i>offset</i> in the stream according to the value + * of <i>whence</i> (see <code>IO#seek</code> for values of + * <i>whence</i>). Returns the new offset into the file. + * + * f = File.new("testfile") + * f.sysseek(-13, IO::SEEK_END) #=> 53 + * f.sysread(10) #=> "And so on." + */ + +static VALUE +rb_io_sysseek(argc, argv, io) + int argc; + VALUE *argv; + VALUE io; +{ + VALUE offset, ptrname; + int whence = SEEK_SET; + OpenFile *fptr; + off_t pos; + + if (rb_scan_args(argc, argv, "11", &offset, &ptrname) == 2) { + whence = NUM2INT(ptrname); + } + pos = NUM2OFFT(offset); + GetOpenFile(io, fptr); + if ((fptr->mode & FMODE_READABLE) && READ_DATA_BUFFERED(fptr)) { + rb_raise(rb_eIOError, "sysseek for buffered IO"); + } + if ((fptr->mode & FMODE_WRITABLE) && fptr->wbuf_len) { + rb_warn("sysseek for buffered IO"); + } + pos = lseek(fptr->fd, pos, whence); + if (pos == -1) rb_sys_fail(fptr->path); + + return OFFT2NUM(pos); +} + +/* + * call-seq: + * ios.syswrite(string) => integer + * + * Writes the given string to <em>ios</em> using a low-level write. + * Returns the number of bytes written. Do not mix with other methods + * that write to <em>ios</em> or you may get unpredictable results. + * Raises <code>SystemCallError</code> on error. + * + * f = File.new("out", "w") + * f.syswrite("ABCDEF") #=> 6 + */ + +static VALUE +rb_io_syswrite(io, str) + VALUE io, str; +{ + OpenFile *fptr; + long n; + + rb_secure(4); + if (TYPE(str) != T_STRING) + str = rb_obj_as_string(str); + + GetOpenFile(io, fptr); + rb_io_check_writable(fptr); + + if (fptr->wbuf_len) { + rb_warn("syswrite for buffered IO"); + } + if (!rb_thread_fd_writable(fptr->fd)) { + rb_io_check_closed(fptr); + } + n = write(fptr->fd, RSTRING(str)->ptr, RSTRING(str)->len); + + if (n == -1) rb_sys_fail(fptr->path); + + return LONG2FIX(n); +} + +/* + * call-seq: + * ios.sysread(integer[, outbuf]) => string + * + * Reads <i>integer</i> bytes from <em>ios</em> using a low-level + * read and returns them as a string. Do not mix with other methods + * that read from <em>ios</em> or you may get unpredictable results. + * If the optional <i>outbuf</i> argument is present, it must reference + * a String, which will receive the data. + * Raises <code>SystemCallError</code> on error and + * <code>EOFError</code> at end of file. + * + * f = File.new("testfile") + * f.sysread(16) #=> "This is line one" + */ + +static VALUE +rb_io_sysread(argc, argv, io) + int argc; + VALUE *argv; + VALUE io; +{ + VALUE len, str; + OpenFile *fptr; + long n, ilen; + + rb_scan_args(argc, argv, "11", &len, &str); + ilen = NUM2LONG(len); + + if (NIL_P(str)) { + str = rb_str_new(0, ilen); + } + else { + StringValue(str); + rb_str_modify(str); + rb_str_resize(str, ilen); + } + if (ilen == 0) return str; + + GetOpenFile(io, fptr); + rb_io_check_readable(fptr); + + if (READ_DATA_BUFFERED(fptr)) { + rb_raise(rb_eIOError, "sysread for buffered IO"); + } + + n = fptr->fd; + rb_thread_wait_fd(fptr->fd); + rb_io_check_closed(fptr); + if (RSTRING(str)->len != ilen) { + rb_raise(rb_eRuntimeError, "buffer string modified"); + } + TRAP_BEG; + n = read(fptr->fd, RSTRING(str)->ptr, ilen); + TRAP_END; + + if (n == -1) { + rb_sys_fail(fptr->path); + } + rb_str_resize(str, n); + if (n == 0 && ilen > 0) { + rb_eof_error(); + } + RSTRING(str)->len = n; + RSTRING(str)->ptr[n] = '\0'; + OBJ_TAINT(str); + + return str; +} + +/* + * call-seq: + * ios.binmode => ios + * + * Puts <em>ios</em> into binary mode. This is useful only in + * MS-DOS/Windows environments. Once a stream is in binary mode, it + * cannot be reset to nonbinary mode. + */ + +VALUE +rb_io_binmode(io) + VALUE io; +{ +#if defined(_WIN32) || defined(DJGPP) || defined(__CYGWIN__) || defined(__human68k__) || defined(__EMX__) + OpenFile *fptr; + + GetOpenFile(io, fptr); + if (!(fptr->mode & FMODE_BINMODE) && READ_DATA_BUFFERED(fptr)) { + rb_raise(rb_eIOError, "buffer already filled with text-mode content"); + } + if (0 <= fptr->fd && setmode(fptr->fd, O_BINARY) == -1) + rb_sys_fail(fptr->path); + + fptr->mode |= FMODE_BINMODE; +#endif + return io; +} + +static char* +rb_io_flags_mode(flags) + int flags; +{ +#ifdef O_BINARY +# define MODE_BINMODE(a,b) ((flags & FMODE_BINMODE) ? (b) : (a)) +#else +# define MODE_BINMODE(a,b) (a) +#endif + if (flags & FMODE_APPEND) { + if ((flags & FMODE_READWRITE) == FMODE_READWRITE) { + return MODE_BINMODE("a+", "ab+"); + } + return MODE_BINMODE("a", "ab"); + } + switch (flags & FMODE_READWRITE) { + case FMODE_READABLE: + return MODE_BINMODE("r", "rb"); + case FMODE_WRITABLE: + return MODE_BINMODE("w", "wb"); + case FMODE_READWRITE: + if (flags & FMODE_CREATE) { + return MODE_BINMODE("w+", "wb+"); + } + return MODE_BINMODE("r+", "rb+"); + } + rb_raise(rb_eArgError, "illegal access modenum %o", flags); + return NULL; /* not reached */ +} + +int +rb_io_mode_flags(mode) + const char *mode; +{ + int flags = 0; + const char *m = mode; + + switch (*m++) { + case 'r': + flags |= FMODE_READABLE; + break; + case 'w': + flags |= FMODE_WRITABLE | FMODE_CREATE; + break; + case 'a': + flags |= FMODE_WRITABLE | FMODE_APPEND | FMODE_CREATE; + break; + default: + error: + rb_raise(rb_eArgError, "illegal access mode %s", mode); + } + + while (*m) { + switch (*m++) { + case 'b': + flags |= FMODE_BINMODE; + break; + case '+': + flags |= FMODE_READWRITE; + break; + default: + goto error; + } + } + + return flags; +} + +int +rb_io_modenum_flags(mode) + int mode; +{ + int flags = 0; + + switch (mode & (O_RDONLY|O_WRONLY|O_RDWR)) { + case O_RDONLY: + flags = FMODE_READABLE; + break; + case O_WRONLY: + flags = FMODE_WRITABLE; + break; + case O_RDWR: + flags = FMODE_READWRITE; + break; + } + + if (mode & O_APPEND) { + flags |= FMODE_APPEND; + } + if (mode & O_CREAT) { + flags |= FMODE_CREATE; + } +#ifdef O_BINARY + if (mode & O_BINARY) { + flags |= FMODE_BINMODE; + } +#endif + + return flags; +} + +static int +rb_io_mode_modenum(mode) + const char *mode; +{ + int flags = 0; + const char *m = mode; + + switch (*m++) { + case 'r': + flags |= O_RDONLY; + break; + case 'w': + flags |= O_WRONLY | O_CREAT | O_TRUNC; + break; + case 'a': + flags |= O_WRONLY | O_CREAT | O_APPEND; + break; + default: + error: + rb_raise(rb_eArgError, "illegal access mode %s", mode); + } + + while (*m) { + switch (*m++) { + case 'b': +#ifdef O_BINARY + flags |= O_BINARY; +#endif + break; + case '+': + flags = (flags & ~O_ACCMODE) | O_RDWR; + break; + default: + goto error; + } + } + + return flags; +} + +#define MODENUM_MAX 4 + +static char* +rb_io_modenum_mode(flags) + int flags; +{ +#ifdef O_BINARY +# define MODE_BINARY(a,b) ((flags & O_BINARY) ? (b) : (a)) +#else +# define MODE_BINARY(a,b) (a) +#endif + if (flags & O_APPEND) { + if ((flags & O_RDWR) == O_RDWR) { + return MODE_BINARY("a+", "ab+"); + } + return MODE_BINARY("a", "ab"); + } + switch (flags & (O_RDONLY|O_WRONLY|O_RDWR)) { + case O_RDONLY: + return MODE_BINARY("r", "rb"); + case O_WRONLY: + return MODE_BINARY("w", "wb"); + case O_RDWR: + return MODE_BINARY("r+", "rb+"); + } + rb_raise(rb_eArgError, "illegal access modenum %o", flags); + return NULL; /* not reached */ +} + +static int +rb_sysopen(fname, flags, mode) + char *fname; + int flags; + unsigned int mode; +{ + int fd; + + fd = open(fname, flags, mode); + if (fd < 0) { + if (errno == EMFILE || errno == ENFILE) { + rb_gc(); + fd = open(fname, flags, mode); + } + if (fd < 0) { + rb_sys_fail(fname); + } + } + return fd; +} + +FILE * +rb_fopen(fname, mode) + const char *fname; + const char *mode; +{ + FILE *file; + + file = fopen(fname, mode); + if (!file) { + if (errno == EMFILE || errno == ENFILE) { + rb_gc(); + file = fopen(fname, mode); + } + if (!file) { + rb_sys_fail(fname); + } + } +#ifdef USE_SETVBUF + if (setvbuf(file, NULL, _IOFBF, 0) != 0) + rb_warn("setvbuf() can't be honoured for %s", fname); +#endif +#ifdef __human68k__ + setmode(fileno(file), O_TEXT); +#endif + return file; +} + +FILE * +rb_fdopen(fd, mode) + int fd; + const char *mode; +{ + FILE *file; + +#if defined(sun) + errno = 0; +#endif + file = fdopen(fd, mode); + if (!file) { +#if defined(sun) + if (errno == 0 || errno == EMFILE || errno == ENFILE) { +#else + if (errno == EMFILE || errno == ENFILE) { +#endif + rb_gc(); +#if defined(sun) + errno = 0; +#endif + file = fdopen(fd, mode); + } + if (!file) { +#ifdef _WIN32 + if (errno == 0) errno = EINVAL; +#elif defined(sun) + if (errno == 0) errno = EMFILE; +#endif + rb_sys_fail(0); + } + } + + /* xxx: should be _IONBF? A buffer in FILE may have trouble. */ +#ifdef USE_SETVBUF + if (setvbuf(file, NULL, _IOFBF, 0) != 0) + rb_warn("setvbuf() can't be honoured (fd=%d)", fd); +#endif + return file; +} + +static void +io_check_tty(OpenFile *fptr) +{ + if (isatty(fptr->fd)) + fptr->mode |= FMODE_TTY|FMODE_DUPLEX; +} + +static VALUE +rb_file_open_internal(io, fname, mode) + VALUE io; + const char *fname, *mode; +{ + OpenFile *fptr; + + MakeOpenFile(io, fptr); + fptr->mode = rb_io_mode_flags(mode); + fptr->path = strdup(fname); + fptr->fd = rb_sysopen(fptr->path, rb_io_mode_modenum(rb_io_flags_mode(fptr->mode)), 0666); + io_check_tty(fptr); + + return io; +} + +VALUE +rb_file_open(fname, mode) + const char *fname, *mode; +{ + return rb_file_open_internal(io_alloc(rb_cFile), fname, mode); +} + +static VALUE +rb_file_sysopen_internal(io, fname, flags, mode) + VALUE io; + char *fname; + int flags, mode; +{ + OpenFile *fptr; + + MakeOpenFile(io, fptr); + + fptr->path = strdup(fname); + fptr->mode = rb_io_modenum_flags(flags); + fptr->fd = rb_sysopen(fptr->path, flags, mode); + io_check_tty(fptr); + + return io; +} + +VALUE +rb_file_sysopen(fname, flags, mode) + const char *fname; + int flags, mode; +{ + return rb_file_sysopen_internal(io_alloc(rb_cFile), fname, flags, mode); +} + +#if defined(__CYGWIN__) || !defined(HAVE_FORK) +static struct pipe_list { + OpenFile *fptr; + struct pipe_list *next; +} *pipe_list; + +static void +pipe_add_fptr(fptr) + OpenFile *fptr; +{ + struct pipe_list *list; + + list = ALLOC(struct pipe_list); + list->fptr = fptr; + list->next = pipe_list; + pipe_list = list; +} + +static void +pipe_del_fptr(fptr) + OpenFile *fptr; +{ + struct pipe_list *list = pipe_list; + struct pipe_list *tmp; + + if (list->fptr == fptr) { + pipe_list = list->next; + free(list); + return; + } + + while (list->next) { + if (list->next->fptr == fptr) { + tmp = list->next; + list->next = list->next->next; + free(tmp); + return; + } + list = list->next; + } +} + +static void +pipe_atexit _((void)) +{ + struct pipe_list *list = pipe_list; + struct pipe_list *tmp; + + while (list) { + tmp = list->next; + rb_io_fptr_finalize(list->fptr); + list = tmp; + } +} + +static void pipe_finalize _((OpenFile *fptr,int)); + +static void +pipe_finalize(fptr, noraise) + OpenFile *fptr; + int noraise; +{ +#if !defined(HAVE_FORK) && !defined(_WIN32) + extern VALUE rb_last_status; + int status; + if (fptr->stdio_file) { + status = pclose(fptr->stdio_file); + } + fptr->fd = -1; + fptr->stdio_file = 0; +#if defined DJGPP + status <<= 8; +#endif + rb_last_status = INT2FIX(status); +#else + fptr_finalize(fptr, noraise); +#endif + pipe_del_fptr(fptr); +} +#endif + +void +rb_io_synchronized(fptr) + OpenFile *fptr; +{ + fptr->mode |= FMODE_SYNC; +} + +void +rb_io_unbuffered(fptr) + OpenFile *fptr; +{ + rb_io_synchronized(fptr); +} + +struct popen_arg { + struct rb_exec_arg exec; + int modef; + int pair[2]; +}; + +static void +popen_redirect(p) + struct popen_arg *p; +{ + if ((p->modef & FMODE_READABLE) && (p->modef & FMODE_WRITABLE)) { + close(p->pair[0]); + dup2(p->pair[1], 0); + dup2(p->pair[1], 1); + if (2 <= p->pair[1]) + close(p->pair[1]); + } + else if (p->modef & FMODE_READABLE) { + close(p->pair[0]); + if (p->pair[1] != 1) { + dup2(p->pair[1], 1); + close(p->pair[1]); + } + } + else { + close(p->pair[1]); + if (p->pair[0] != 0) { + dup2(p->pair[0], 0); + close(p->pair[0]); + } + } +} + +#ifdef HAVE_FORK +static int +popen_exec(p) + struct popen_arg *p; +{ + int fd; + + popen_redirect(p); + for (fd = 3; fd < NOFILE; fd++) { +#ifdef FD_CLOEXEC + fcntl(fd, F_SETFL, FD_CLOEXEC); +#else + close(fd); +#endif + } + return rb_exec(&p->exec); +} +#endif + +static VALUE +pipe_open(argc, argv, mode) + int argc; + VALUE *argv; + char *mode; +{ + int modef = rb_io_mode_flags(mode); + int pid = 0; + OpenFile *fptr; + VALUE port, prog; +#if defined(HAVE_FORK) + int status; + struct popen_arg arg; + volatile int doexec; +#elif defined(_WIN32) + int openmode = rb_io_mode_modenum(mode); + char *exename = NULL; +#endif + char *cmd; + FILE *fp = 0; + int fd = -1; + + prog = rb_check_argv(argc, argv); + if (!prog) { + if (argc == 1) argc = 0; + prog = argv[0]; + } + +#if defined(HAVE_FORK) + cmd = StringValueCStr(prog); + doexec = (strcmp("-", cmd) != 0); + if (!doexec) { + fflush(stdin); /* is it really needed? */ + rb_io_flush(rb_stdout); + rb_io_flush(rb_stderr); + } + arg.modef = modef; + arg.pair[0] = arg.pair[1] = -1; + if ((modef & FMODE_READABLE) && (modef & FMODE_WRITABLE)) { + if (socketpair(AF_UNIX, SOCK_STREAM, 0, arg.pair) < 0) + rb_sys_fail(cmd); + } + else if (modef & FMODE_READABLE) { + if (pipe(arg.pair) < 0) + rb_sys_fail(cmd); + } + else if (modef & FMODE_WRITABLE) { + if (pipe(arg.pair) < 0) + rb_sys_fail(cmd); + } + else { + rb_sys_fail(cmd); + } + if (doexec) { + arg.exec.argc = argc; + arg.exec.argv = argv; + arg.exec.prog = cmd; + pid = rb_fork(&status, popen_exec, &arg); + } + else { + pid = rb_fork(&status, 0, 0); + if (pid == 0) { /* child */ + popen_redirect(&arg); + rb_io_synchronized(RFILE(orig_stdout)->fptr); + rb_io_synchronized(RFILE(orig_stderr)->fptr); + return Qnil; + } + } + + /* parent */ + if (pid == -1) { + int e = errno; + close(arg.pair[0]); + close(arg.pair[1]); + errno = e; + rb_sys_fail(cmd); + } + if ((modef & FMODE_READABLE) && (modef & FMODE_WRITABLE)) { + close(arg.pair[1]); + fd = arg.pair[0]; + } + else if (modef & FMODE_READABLE) { + close(arg.pair[1]); + fd = arg.pair[0]; + } + else { + close(arg.pair[0]); + fd = arg.pair[1]; + } +#elif defined(_WIN32) + if (argc) { + char **args = ALLOCA_N(char *, argc+1); + int i; + + for (i = 0; i < argc; ++i) { + args[i] = RSTRING(argv[i])->ptr; + } + args[i] = NULL; + cmd = ALLOCA_N(char, rb_w32_argv_size(args)); + rb_w32_join_argv(cmd, args); + exename = RSTRING(prog)->ptr; + } + else { + cmd = StringValueCStr(prog); + } + while ((pid = rb_w32_pipe_exec(cmd, exename, openmode, &fd)) == -1) { + /* exec failed */ + switch (errno) { + case EAGAIN: +#if defined(EWOULDBLOCK) && EWOULDBLOCK != EAGAIN + case EWOULDBLOCK: +#endif + rb_thread_sleep(1); + break; + default: + rb_sys_fail(RSTRING(prog)->ptr); + break; + } + } +#else + if (argc) + prog = rb_ary_join(rb_ary_new4(argc, argv), rb_str_new2(" ")); + fp = popen(StringValueCStr(prog), mode); + if (!fp) rb_sys_fail(RSTRING(prog)->ptr); + fd = fileno(fp); +#endif + + port = io_alloc(rb_cIO); + MakeOpenFile(port, fptr); + fptr->fd = fd; + fptr->stdio_file = fp; + fptr->mode = modef | FMODE_SYNC|FMODE_DUPLEX; + fptr->pid = pid; + +#if defined (__CYGWIN__) || !defined(HAVE_FORK) + fptr->finalize = pipe_finalize; + pipe_add_fptr(fptr); +#endif + return port; +} + +/* + * call-seq: + * IO.popen(cmd, mode="r") => io + * IO.popen(cmd, mode="r") {|io| block } => obj + * + * Runs the specified command as a subprocess; the subprocess's + * standard input and output will be connected to the returned + * <code>IO</code> object. If _cmd_ is a +String+ + * ``<code>-</code>'', then a new instance of Ruby is started as the + * subprocess. If <i>cmd</i> is an +Array+ of +String+, then it will + * be used as the subprocess's +argv+ bypassing a shell. The default + * mode for the new file object is ``r'', but <i>mode</i> may be set + * to any of the modes listed in the description for class IO. + * + * Raises exceptions which <code>IO::pipe</code> and + * <code>Kernel::system</code> raise. + * + * If a block is given, Ruby will run the command as a child connected + * to Ruby with a pipe. Ruby's end of the pipe will be passed as a + * parameter to the block. In this case <code>IO::popen</code> returns + * the value of the block. + * + * If a block is given with a _cmd_ of ``<code>-</code>'', + * the block will be run in two separate processes: once in the parent, + * and once in a child. The parent process will be passed the pipe + * object as a parameter to the block, the child version of the block + * will be passed <code>nil</code>, and the child's standard in and + * standard out will be connected to the parent through the pipe. Not + * available on all platforms. + * + * f = IO.popen("uname") + * p f.readlines + * puts "Parent is #{Process.pid}" + * IO.popen("date") { |f| puts f.gets } + * IO.popen("-") {|f| $stderr.puts "#{Process.pid} is here, f is #{f}"} + * IO.popen(%w"sed -e s|^|<foo>| -e s&$&;zot;&", "r+") {|f| + * f.puts "bar"; f.close_write; puts f.gets + * } + * + * <em>produces:</em> + * + * ["Linux\n"] + * Parent is 26166 + * Wed Apr 9 08:53:52 CDT 2003 + * 26169 is here, f is + * 26166 is here, f is #<IO:0x401b3d44> + * <foo>bar;zot; + */ + +static VALUE +rb_io_s_popen(argc, argv, klass) + int argc; + VALUE *argv; + VALUE klass; +{ + char *mode; + VALUE pname, pmode, port, tmp; + + if (rb_scan_args(argc, argv, "11", &pname, &pmode) == 1) { + mode = "r"; + } + else if (FIXNUM_P(pmode)) { + mode = rb_io_modenum_mode(FIX2INT(pmode)); + } + else { + mode = rb_io_flags_mode(rb_io_mode_flags(StringValuePtr(pmode))); + } + tmp = rb_check_array_type(pname); + if (!NIL_P(tmp)) { + VALUE *argv = ALLOCA_N(VALUE, RARRAY(tmp)->len); + + MEMCPY(argv, RARRAY(tmp)->ptr, VALUE, RARRAY(tmp)->len); + port = pipe_open(RARRAY(tmp)->len, argv, mode); + pname = tmp; + } + else { + SafeStringValue(pname); + port = pipe_open(1, &pname, mode); + } + if (NIL_P(port)) { + /* child */ + if (rb_block_given_p()) { + rb_yield(Qnil); + rb_io_flush(rb_stdout); + rb_io_flush(rb_stderr); + _exit(0); + } + return Qnil; + } + RBASIC(port)->klass = klass; + if (rb_block_given_p()) { + return rb_ensure(rb_yield, port, io_close, port); + } + return port; +} + +static VALUE +rb_open_file(argc, argv, io) + int argc; + VALUE *argv; + VALUE io; +{ + VALUE fname, vmode, perm; + char *mode; + int flags, fmode; + + rb_scan_args(argc, argv, "12", &fname, &vmode, &perm); + FilePathValue(fname); + + if (FIXNUM_P(vmode) || !NIL_P(perm)) { + if (FIXNUM_P(vmode)) { + flags = FIX2INT(vmode); + } + else { + SafeStringValue(vmode); + flags = rb_io_mode_modenum(RSTRING(vmode)->ptr); + } + fmode = NIL_P(perm) ? 0666 : NUM2INT(perm); + + rb_file_sysopen_internal(io, RSTRING(fname)->ptr, flags, fmode); + } + else { + mode = NIL_P(vmode) ? "r" : StringValuePtr(vmode); + rb_file_open_internal(io, RSTRING(fname)->ptr, mode); + } + return io; +} + +/* + * call-seq: + * IO.open(fd, mode_string="r" ) => io + * IO.open(fd, mode_string="r" ) {|io| block } => obj + * + * With no associated block, <code>open</code> is a synonym for + * <code>IO::new</code>. If the optional code block is given, it will + * be passed <i>io</i> as an argument, and the IO object will + * automatically be closed when the block terminates. In this instance, + * <code>IO::open</code> returns the value of the block. + * + */ + +static VALUE +rb_io_s_open(argc, argv, klass) + int argc; + VALUE *argv; + VALUE klass; +{ + VALUE io = rb_class_new_instance(argc, argv, klass); + + if (rb_block_given_p()) { + return rb_ensure(rb_yield, io, io_close, io); + } + + return io; +} + +/* + * call-seq: + * IO.sysopen(path, [mode, [perm]]) => fixnum + * + * Opens the given path, returning the underlying file descriptor as a + * <code>Fixnum</code>. + * + * IO.sysopen("testfile") #=> 3 + * + */ + +static VALUE +rb_io_s_sysopen(argc, argv) + int argc; + VALUE *argv; +{ + VALUE fname, vmode, perm; + int flags, fmode, fd; + char *path; + + rb_scan_args(argc, argv, "12", &fname, &vmode, &perm); + FilePathValue(fname); + + if (NIL_P(vmode)) flags = O_RDONLY; + else if (FIXNUM_P(vmode)) flags = FIX2INT(vmode); + else { + SafeStringValue(vmode); + flags = rb_io_mode_modenum(RSTRING(vmode)->ptr); + } + if (NIL_P(perm)) fmode = 0666; + else fmode = NUM2INT(perm); + + path = ALLOCA_N(char, strlen(RSTRING(fname)->ptr)+1); + strcpy(path, RSTRING(fname)->ptr); + fd = rb_sysopen(path, flags, fmode); + return INT2NUM(fd); +} + +/* + * call-seq: + * open(path [, mode [, perm]] ) => io or nil + * open(path [, mode [, perm]] ) {|io| block } => obj + * + * Creates an <code>IO</code> object connected to the given stream, + * file, or subprocess. + * + * If <i>path</i> does not start with a pipe character + * (``<code>|</code>''), treat it as the name of a file to open using + * the specified mode (defaulting to ``<code>r</code>''). (See the table + * of valid modes on page 331.) If a file is being created, its initial + * permissions may be set using the integer third parameter. + * + * If a block is specified, it will be invoked with the + * <code>File</code> object as a parameter, and the file will be + * automatically closed when the block terminates. The call + * returns the value of the block. + * + * If <i>path</i> starts with a pipe character, a subprocess is + * created, connected to the caller by a pair of pipes. The returned + * <code>IO</code> object may be used to write to the standard input + * and read from the standard output of this subprocess. If the command + * following the ``<code>|</code>'' is a single minus sign, Ruby forks, + * and this subprocess is connected to the parent. In the subprocess, + * the <code>open</code> call returns <code>nil</code>. If the command + * is not ``<code>-</code>'', the subprocess runs the command. If a + * block is associated with an <code>open("|-")</code> call, that block + * will be run twice---once in the parent and once in the child. The + * block parameter will be an <code>IO</code> object in the parent and + * <code>nil</code> in the child. The parent's <code>IO</code> object + * will be connected to the child's <code>$stdin</code> and + * <code>$stdout</code>. The subprocess will be terminated at the end + * of the block. + * + * open("testfile") do |f| + * print f.gets + * end + * + * <em>produces:</em> + * + * This is line one + * + * Open a subprocess and read its output: + * + * cmd = open("|date") + * print cmd.gets + * cmd.close + * + * <em>produces:</em> + * + * Wed Apr 9 08:56:31 CDT 2003 + * + * Open a subprocess running the same Ruby program: + * + * f = open("|-", "w+") + * if f == nil + * puts "in Child" + * exit + * else + * puts "Got: #{f.gets}" + * end + * + * <em>produces:</em> + * + * Got: in Child + * + * Open a subprocess using a block to receive the I/O object: + * + * open("|-") do |f| + * if f == nil + * puts "in Child" + * else + * puts "Got: #{f.gets}" + * end + * end + * + * <em>produces:</em> + * + * Got: in Child + */ + +static VALUE +rb_f_open(argc, argv) + int argc; + VALUE *argv; +{ + if (argc >= 1) { + ID to_open = rb_intern("to_open"); + + if (rb_respond_to(argv[0], to_open)) { + VALUE io = rb_funcall2(argv[0], to_open, argc-1, argv+1); + + if (rb_block_given_p()) { + return rb_ensure(rb_yield, io, io_close, io); + } + return io; + } + else { + VALUE tmp = rb_check_string_type(argv[0]); + if (!NIL_P(tmp)) { + char *str = StringValuePtr(tmp); + if (str && str[0] == '|') { + argv[0] = rb_str_new(str+1, RSTRING(tmp)->len-1); + OBJ_INFECT(argv[0], tmp); + return rb_io_s_popen(argc, argv, rb_cIO); + } + } + } + } + return rb_io_s_open(argc, argv, rb_cFile); +} + +static VALUE +rb_io_open(fname, mode) + char *fname, *mode; +{ + if (fname[0] == '|') { + VALUE cmd = rb_str_new2(fname+1); + return pipe_open(1, &cmd, mode); + } + else { + return rb_file_open(fname, mode); + } +} + +static VALUE +io_reopen(io, nfile) + VALUE io, nfile; +{ + OpenFile *fptr, *orig; + int fd, fd2; + off_t pos = 0; + + nfile = rb_io_get_io(nfile); + if (rb_safe_level() >= 4 && (!OBJ_TAINTED(io) || !OBJ_TAINTED(nfile))) { + rb_raise(rb_eSecurityError, "Insecure: can't reopen"); + } + GetOpenFile(io, fptr); + GetOpenFile(nfile, orig); + + if (fptr == orig) return io; +#if !defined __CYGWIN__ + if (IS_PREP_STDIO(fptr)) { + if ((fptr->mode & FMODE_READWRITE) != (orig->mode & FMODE_READWRITE)) { + rb_raise(rb_eArgError, + "%s can't change access mode from \"%s\" to \"%s\"", + PREP_STDIO_NAME(fptr), rb_io_flags_mode(fptr->mode), + rb_io_flags_mode(orig->mode)); + } + } +#endif + if (orig->mode & FMODE_READABLE) { + pos = io_tell(orig); + } + if (orig->mode & FMODE_WRITABLE) { + io_fflush(orig); + } + if (fptr->mode & FMODE_WRITABLE) { + io_fflush(fptr); + } + + /* copy OpenFile structure */ + fptr->mode = orig->mode; + fptr->pid = orig->pid; + fptr->lineno = orig->lineno; + if (fptr->path) free(fptr->path); + if (orig->path) fptr->path = strdup(orig->path); + else fptr->path = 0; + fptr->finalize = orig->finalize; + + fd = fptr->fd; + fd2 = orig->fd; + if (fd != fd2) { +#if !defined __CYGWIN__ + if (IS_PREP_STDIO(fptr)) { + /* need to keep stdio objects */ + if (dup2(fd2, fd) < 0) + rb_sys_fail(orig->path); + } + else { +#endif + if (fptr->stdio_file) + fclose(fptr->stdio_file); + else + close(fptr->fd); + fptr->stdio_file = 0; + fptr->fd = -1; + if (dup2(fd2, fd) < 0) + rb_sys_fail(orig->path); + fptr->fd = fd; +#if !defined __CYGWIN__ + } +#endif + rb_thread_fd_close(fd); + if ((orig->mode & FMODE_READABLE) && pos >= 0) { + if (io_seek(fptr, pos, SEEK_SET) < 0) { + rb_sys_fail(fptr->path); + } + if (io_seek(orig, pos, SEEK_SET) < 0) { + rb_sys_fail(orig->path); + } + } + } + + if (fptr->mode & FMODE_BINMODE) { + rb_io_binmode(io); + } + + RBASIC(io)->klass = RBASIC(nfile)->klass; + return io; +} + +/* + * call-seq: + * ios.reopen(other_IO) => ios + * ios.reopen(path, mode_str) => ios + * + * Reassociates <em>ios</em> with the I/O stream given in + * <i>other_IO</i> or to a new stream opened on <i>path</i>. This may + * dynamically change the actual class of this stream. + * + * f1 = File.new("testfile") + * f2 = File.new("testfile") + * f2.readlines[0] #=> "This is line one\n" + * f2.reopen(f1) #=> #<File:testfile> + * f2.readlines[0] #=> "This is line one\n" + */ + +static VALUE +rb_io_reopen(argc, argv, file) + int argc; + VALUE *argv; + VALUE file; +{ + VALUE fname, nmode; + char *mode; + OpenFile *fptr; + + rb_secure(4); + if (rb_scan_args(argc, argv, "11", &fname, &nmode) == 1) { + VALUE tmp = rb_io_check_io(fname); + if (!NIL_P(tmp)) { + return io_reopen(file, tmp); + } + } + + FilePathValue(fname); + rb_io_taint_check(file); + fptr = RFILE(file)->fptr; + if (!fptr) { + fptr = RFILE(file)->fptr = ALLOC(OpenFile); + MEMZERO(fptr, OpenFile, 1); + } + + if (!NIL_P(nmode)) { + int flags = rb_io_mode_flags(StringValuePtr(nmode)); + if (IS_PREP_STDIO(fptr) && + (fptr->mode & FMODE_READWRITE) != (flags & FMODE_READWRITE)) { + rb_raise(rb_eArgError, + "%s can't change access mode from \"%s\" to \"%s\"", + PREP_STDIO_NAME(fptr), rb_io_flags_mode(fptr->mode), + rb_io_flags_mode(flags)); + } + fptr->mode = flags; + } + + if (fptr->path) { + free(fptr->path); + fptr->path = 0; + } + + fptr->path = strdup(RSTRING(fname)->ptr); + mode = rb_io_flags_mode(fptr->mode); + if (fptr->fd < 0) { + fptr->fd = rb_sysopen(fptr->path, rb_io_mode_modenum(mode), 0666); + fptr->stdio_file = 0; + return file; + } + + if (fptr->stdio_file) { + if (freopen(RSTRING(fname)->ptr, mode, fptr->stdio_file) == 0) { + rb_sys_fail(fptr->path); + } + fptr->fd = fileno(fptr->stdio_file); +#ifdef USE_SETVBUF + if (setvbuf(fptr->stdio_file, NULL, _IOFBF, 0) != 0) + rb_warn("setvbuf() can't be honoured for %s", RSTRING(fname)->ptr); +#endif + } + else { + if (close(fptr->fd) < 0) + rb_sys_fail(fptr->path); + fptr->fd = -1; + fptr->fd = rb_sysopen(fptr->path, rb_io_mode_modenum(mode), 0666); + } + + return file; +} + +/* :nodoc: */ +static VALUE +rb_io_init_copy(dest, io) + VALUE dest, io; +{ + OpenFile *fptr, *orig; + int fd; + + io = rb_io_get_io(io); + if (dest == io) return dest; + GetOpenFile(io, orig); + MakeOpenFile(dest, fptr); + + rb_io_flush(io); + + /* copy OpenFile structure */ + fptr->mode = orig->mode; + fptr->pid = orig->pid; + fptr->lineno = orig->lineno; + if (orig->path) fptr->path = strdup(orig->path); + fptr->finalize = orig->finalize; + + fd = ruby_dup(orig->fd); + fptr->fd = fd; + io_seek(fptr, io_tell(orig), SEEK_SET); + if (fptr->mode & FMODE_BINMODE) { + rb_io_binmode(dest); + } + + return dest; +} + +/* + * call-seq: + * ios.printf(format_string [, obj, ...] ) => nil + * + * Formats and writes to <em>ios</em>, converting parameters under + * control of the format string. See <code>Kernel#sprintf</code> + * for details. + */ + +VALUE +rb_io_printf(argc, argv, out) + int argc; + VALUE argv[]; + VALUE out; +{ + rb_io_write(out, rb_f_sprintf(argc, argv)); + return Qnil; +} + +/* + * call-seq: + * printf(io, string [, obj ... ] ) => nil + * printf(string [, obj ... ] ) => nil + * + * Equivalent to: + * io.write(sprintf(string, obj, ...) + * or + * $stdout.write(sprintf(string, obj, ...) + */ + +static VALUE +rb_f_printf(argc, argv) + int argc; + VALUE argv[]; +{ + VALUE out; + + if (argc == 0) return Qnil; + if (TYPE(argv[0]) == T_STRING) { + out = rb_stdout; + } + else { + out = argv[0]; + argv++; + argc--; + } + rb_io_write(out, rb_f_sprintf(argc, argv)); + + return Qnil; +} + +/* + * call-seq: + * ios.print() => nil + * ios.print(obj, ...) => nil + * + * Writes the given object(s) to <em>ios</em>. The stream must be + * opened for writing. If the output record separator (<code>$\</code>) + * is not <code>nil</code>, it will be appended to the output. If no + * arguments are given, prints <code>$_</code>. Objects that aren't + * strings will be converted by calling their <code>to_s</code> method. + * With no argument, prints the contents of the variable <code>$_</code>. + * Returns <code>nil</code>. + * + * $stdout.print("This is ", 100, " percent.\n") + * + * <em>produces:</em> + * + * This is 100 percent. + */ + +VALUE +rb_io_print(argc, argv, out) + int argc; + VALUE *argv; + VALUE out; +{ + int i; + VALUE line; + + /* if no argument given, print `$_' */ + if (argc == 0) { + argc = 1; + line = rb_lastline_get(); + argv = &line; + } + for (i=0; i<argc; i++) { + if (!NIL_P(rb_output_fs) && i>0) { + rb_io_write(out, rb_output_fs); + } + switch (TYPE(argv[i])) { + case T_NIL: + rb_io_write(out, rb_str_new2("nil")); + break; + default: + rb_io_write(out, argv[i]); + break; + } + } + if (!NIL_P(rb_output_rs)) { + rb_io_write(out, rb_output_rs); + } + + return Qnil; +} + +/* + * call-seq: + * print(obj, ...) => nil + * + * Prints each object in turn to <code>$stdout</code>. If the output + * field separator (<code>$,</code>) is not +nil+, its + * contents will appear between each field. If the output record + * separator (<code>$\</code>) is not +nil+, it will be + * appended to the output. If no arguments are given, prints + * <code>$_</code>. Objects that aren't strings will be converted by + * calling their <code>to_s</code> method. + * + * print "cat", [1,2,3], 99, "\n" + * $, = ", " + * $\ = "\n" + * print "cat", [1,2,3], 99 + * + * <em>produces:</em> + * + * cat12399 + * cat, 1, 2, 3, 99 + */ + +static VALUE +rb_f_print(argc, argv) + int argc; + VALUE *argv; +{ + rb_io_print(argc, argv, rb_stdout); + return Qnil; +} + +/* + * call-seq: + * ios.putc(obj) => obj + * + * If <i>obj</i> is <code>Numeric</code>, write the character whose + * code is <i>obj</i>, otherwise write the first character of the + * string representation of <i>obj</i> to <em>ios</em>. + * + * $stdout.putc "A" + * $stdout.putc 65 + * + * <em>produces:</em> + * + * AA + */ + +static VALUE +rb_io_putc(io, ch) + VALUE io, ch; +{ + char c = NUM2CHR(ch); + + rb_io_write(io, rb_str_new(&c, 1)); + return ch; +} + +/* + * call-seq: + * putc(int) => int + * + * Equivalent to: + * + * $stdout.putc(int) + */ + +static VALUE +rb_f_putc(recv, ch) + VALUE recv, ch; +{ + return rb_io_putc(rb_stdout, ch); +} + +static VALUE +io_puts_ary(ary, out, recur) + VALUE ary, out; +{ + VALUE tmp; + long i; + + for (i=0; i<RARRAY(ary)->len; i++) { + tmp = RARRAY(ary)->ptr[i]; + if (recur) { + tmp = rb_str_new2("[...]"); + } + rb_io_puts(1, &tmp, out); + } + return Qnil; +} + +/* + * call-seq: + * ios.puts(obj, ...) => nil + * + * Writes the given objects to <em>ios</em> as with + * <code>IO#print</code>. Writes a record separator (typically a + * newline) after any that do not already end with a newline sequence. + * If called with an array argument, writes each element on a new line. + * If called without arguments, outputs a single record separator. + * + * $stdout.puts("this", "is", "a", "test") + * + * <em>produces:</em> + * + * this + * is + * a + * test + */ + +VALUE +rb_io_puts(argc, argv, out) + int argc; + VALUE *argv; + VALUE out; +{ + int i; + VALUE line; + + /* if no argument given, print newline. */ + if (argc == 0) { + rb_io_write(out, rb_default_rs); + return Qnil; + } + for (i=0; i<argc; i++) { + if (NIL_P(argv[i])) { + line = rb_str_new2("nil"); + } + else { + line = rb_check_array_type(argv[i]); + if (!NIL_P(line)) { + rb_exec_recursive(io_puts_ary, line, out); + continue; + } + line = rb_obj_as_string(argv[i]); + } + rb_io_write(out, line); + if (RSTRING(line)->len == 0 || + RSTRING(line)->ptr[RSTRING(line)->len-1] != '\n') { + rb_io_write(out, rb_default_rs); + } + } + + return Qnil; +} + +/* + * call-seq: + * puts(obj, ...) => nil + * + * Equivalent to + * + * $stdout.puts(obj, ...) + */ + +static VALUE +rb_f_puts(argc, argv) + int argc; + VALUE *argv; +{ + rb_io_puts(argc, argv, rb_stdout); + return Qnil; +} + +void +rb_p(obj) /* for debug print within C code */ + VALUE obj; +{ + rb_io_write(rb_stdout, rb_obj_as_string(rb_inspect(obj))); + rb_io_write(rb_stdout, rb_default_rs); +} + +/* + * call-seq: + * p(obj, ...) => nil + * + * For each object, directly writes + * _obj_.+inspect+ followed by the current output + * record separator to the program's standard output. + * + * S = Struct.new(:name, :state) + * s = S['dave', 'TX'] + * p s + * + * <em>produces:</em> + * + * #<S name="dave", state="TX"> + */ + +static VALUE +rb_f_p(argc, argv) + int argc; + VALUE *argv; +{ + int i; + + for (i=0; i<argc; i++) { + rb_p(argv[i]); + } + if (TYPE(rb_stdout) == T_FILE) { + rb_io_flush(rb_stdout); + } + return Qnil; +} + +/* + * call-seq: + * obj.display(port=$>) => nil + * + * Prints <i>obj</i> on the given port (default <code>$></code>). + * Equivalent to: + * + * def display(port=$>) + * port.write self + * end + * + * For example: + * + * 1.display + * "cat".display + * [ 4, 5, 6 ].display + * puts + * + * <em>produces:</em> + * + * 1cat456 + */ + +static VALUE +rb_obj_display(argc, argv, self) + int argc; + VALUE *argv; + VALUE self; +{ + VALUE out; + + if (rb_scan_args(argc, argv, "01", &out) == 0) { + out = rb_stdout; + } + + rb_io_write(out, self); + + return Qnil; +} + +void +rb_write_error2(mesg, len) + const char *mesg; + long len; +{ + rb_io_write(rb_stderr, rb_str_new(mesg, len)); +} + +void +rb_write_error(mesg) + const char *mesg; +{ + rb_write_error2(mesg, strlen(mesg)); +} + +static void +must_respond_to(mid, val, id) + ID mid; + VALUE val; + ID id; +{ + if (!rb_respond_to(val, mid)) { + rb_raise(rb_eTypeError, "%s must have %s method, %s given", + rb_id2name(id), rb_id2name(mid), + rb_obj_classname(val)); + } +} + +static void +stdout_setter(val, id, variable) + VALUE val; + ID id; + VALUE *variable; +{ + must_respond_to(id_write, val, id); + *variable = val; +} + +static void +defout_setter(val, id, variable) + VALUE val; + ID id; + VALUE *variable; +{ + stdout_setter(val, id, variable); + rb_warn("$defout is obsolete; use $stdout instead"); +} + +static void +deferr_setter(val, id, variable) + VALUE val; + ID id; + VALUE *variable; +{ + stdout_setter(val, id, variable); + rb_warn("$deferr is obsolete; use $stderr instead"); +} + +static VALUE +prep_io(fd, mode, klass, path) + int fd; + int mode; + VALUE klass; + const char *path; +{ + OpenFile *fp; + VALUE io = io_alloc(klass); + + MakeOpenFile(io, fp); + fp->fd = fd; +#ifdef __CYGWIN__ + if (!isatty(fd)) { + mode |= O_BINARY; + setmode(fd, O_BINARY); + } +#endif + fp->mode = mode; + io_check_tty(fp); + if (path) fp->path = strdup(path); + + return io; +} + +static VALUE +prep_stdio(f, mode, klass, path) + FILE *f; + int mode; + VALUE klass; + const char *path; +{ + OpenFile *fptr; + VALUE io = prep_io(fileno(f), mode|FMODE_PREP, klass, path); + + GetOpenFile(io, fptr); + fptr->stdio_file = f; + + return io; +} + +FILE *rb_io_stdio_file(OpenFile *fptr) +{ + if (!fptr->stdio_file) { + fptr->stdio_file = rb_fdopen(fptr->fd, rb_io_flags_mode(fptr->mode)); + } + return fptr->stdio_file; +} + +/* + * call-seq: + * IO.new(fd, mode) => io + * + * Returns a new <code>IO</code> object (a stream) for the given + * <code>IO</code> object or integer file descriptor and mode + * string. See also <code>IO#fileno</code> and + * <code>IO::for_fd</code>. + * + * puts IO.new($stdout).fileno # => 1 + * + * a = IO.new(2,"w") # '2' is standard error + * $stderr.puts "Hello" + * a.puts "World" + * + * <em>produces:</em> + * + * Hello + * World + */ + +static VALUE +rb_io_initialize(argc, argv, io) + int argc; + VALUE *argv; + VALUE io; +{ + VALUE fnum, mode, orig; + OpenFile *fp, *ofp = NULL; + int fd, flags, fmode; + + rb_secure(4); + rb_scan_args(argc, argv, "11", &fnum, &mode); + if (argc == 2) { + if (FIXNUM_P(mode)) { + flags = FIX2LONG(mode); + } + else { + SafeStringValue(mode); + flags = rb_io_mode_modenum(RSTRING(mode)->ptr); + } + } + orig = rb_io_check_io(fnum); + if (NIL_P(orig)) { + fd = NUM2INT(fnum); + if (argc != 2) { +#if defined(HAVE_FCNTL) && defined(F_GETFL) + flags = fcntl(fd, F_GETFL); + if (flags == -1) rb_sys_fail(0); +#else + flags = O_RDONLY; +#endif + } + MakeOpenFile(io, fp); + fp->fd = fd; + fp->mode = rb_io_modenum_flags(flags); + io_check_tty(fp); + } + else if (RFILE(io)->fptr) { + rb_raise(rb_eRuntimeError, "reinitializing IO"); + } + else { + GetOpenFile(orig, ofp); + if (ofp->refcnt == LONG_MAX) { + VALUE s = rb_inspect(orig); + rb_raise(rb_eIOError, "too many shared IO for %s", StringValuePtr(s)); + } + if (argc == 2) { + fmode = rb_io_modenum_flags(flags); + if ((ofp->mode ^ fmode) & (FMODE_READWRITE|FMODE_BINMODE)) { + if (FIXNUM_P(mode)) { + rb_raise(rb_eArgError, "incompatible mode 0%o", flags); + } + else { + rb_raise(rb_eArgError, "incompatible mode \"%s\"", RSTRING(mode)->ptr); + } + } + } + ofp->refcnt++; + RFILE(io)->fptr = ofp; + } + + return io; +} + + +/* + * call-seq: + * File.new(filename, mode="r") => file + * File.new(filename [, mode [, perm]]) => file + * + + * Opens the file named by _filename_ according to + * _mode_ (default is ``r'') and returns a new + * <code>File</code> object. See the description of class +IO+ for + * a description of _mode_. The file mode may optionally be + * specified as a +Fixnum+ by _or_-ing together the + * flags (O_RDONLY etc, again described under +IO+). Optional + * permission bits may be given in _perm_. These mode and permission + * bits are platform dependent; on Unix systems, see + * <code>open(2)</code> for details. + * + * f = File.new("testfile", "r") + * f = File.new("newfile", "w+") + * f = File.new("newfile", File::CREAT|File::TRUNC|File::RDWR, 0644) + */ + +static VALUE +rb_file_initialize(argc, argv, io) + int argc; + VALUE *argv; + VALUE io; +{ + if (RFILE(io)->fptr) { + rb_raise(rb_eRuntimeError, "reinitializing File"); + } + if (0 < argc && argc < 3) { + VALUE fd = rb_check_convert_type(argv[0], T_FIXNUM, "Fixnum", "to_int"); + + if (!NIL_P(fd)) { + argv[0] = fd; + return rb_io_initialize(argc, argv, io); + } + } + rb_open_file(argc, argv, io); + + return io; +} + +/* + * call-seq: + * IO.new(fd, mode_string) => io + * + * Returns a new <code>IO</code> object (a stream) for the given + * integer file descriptor and mode string. See also + * <code>IO#fileno</code> and <code>IO::for_fd</code>. + * + * a = IO.new(2,"w") # '2' is standard error + * $stderr.puts "Hello" + * a.puts "World" + * + * <em>produces:</em> + * + * Hello + * World + */ + +static VALUE +rb_io_s_new(argc, argv, klass) + int argc; + VALUE *argv; + VALUE klass; +{ + if (rb_block_given_p()) { + char *cname = rb_class2name(klass); + + rb_warn("%s::new() does not take block; use %s::open() instead", + cname, cname); + } + return rb_class_new_instance(argc, argv, klass); +} + + +/* + * call-seq: + * IO.for_fd(fd, mode) => io + * + * Synonym for <code>IO::new</code>. + * + */ + +static VALUE +rb_io_s_for_fd(argc, argv, klass) + int argc; + VALUE *argv; + VALUE klass; +{ + VALUE io = rb_obj_alloc(klass); + rb_io_initialize(argc, argv, io); + return io; +} + +static int binmode = 0; + +static VALUE +argf_forward(argc, argv) + int argc; + VALUE *argv; +{ + return rb_funcall3(current_file, rb_frame_this_func(), argc, argv); +} + +#define ARGF_FORWARD(argc, argv) do {\ + if (TYPE(current_file) != T_FILE)\ + return argf_forward(argc, argv);\ +} while (0) +#define NEXT_ARGF_FORWARD(argc, argv) do {\ + if (!next_argv()) return Qnil;\ + ARGF_FORWARD(argc, argv);\ +} while (0) + +static void +argf_close(file) + VALUE file; +{ + if (TYPE(file) == T_FILE) + rb_io_close(file); + else + rb_funcall3(file, rb_intern("close"), 0, 0); +} + +static int +next_argv() +{ + extern VALUE rb_argv; + char *fn; + OpenFile *fptr; + int stdout_binmode = 0; + + if (TYPE(rb_stdout) == T_FILE) { + GetOpenFile(rb_stdout, fptr); + if (fptr->mode & FMODE_BINMODE) + stdout_binmode = 1; + } + + if (init_p == 0) { + if (RARRAY(rb_argv)->len > 0) { + next_p = 1; + } + else { + next_p = -1; + } + init_p = 1; + gets_lineno = 0; + } + + if (next_p == 1) { + next_p = 0; + retry: + if (RARRAY(rb_argv)->len > 0) { + filename = rb_ary_shift(rb_argv); + fn = StringValuePtr(filename); + if (strlen(fn) == 1 && fn[0] == '-') { + current_file = rb_stdin; + if (ruby_inplace_mode) { + rb_warn("Can't do inplace edit for stdio; skipping"); + goto retry; + } + } + else { + int fr = rb_sysopen(fn, O_RDONLY, 0); + + if (ruby_inplace_mode) { + struct stat st, st2; + VALUE str; + int fw; + + if (TYPE(rb_stdout) == T_FILE && rb_stdout != orig_stdout) { + rb_io_close(rb_stdout); + } + fstat(fr, &st); + if (*ruby_inplace_mode) { + str = rb_str_new2(fn); +#ifdef NO_LONG_FNAME + ruby_add_suffix(str, ruby_inplace_mode); +#else + rb_str_cat2(str, ruby_inplace_mode); +#endif +#ifdef NO_SAFE_RENAME + (void)close(fr); + (void)unlink(RSTRING(str)->ptr); + (void)rename(fn, RSTRING(str)->ptr); + fr = rb_sysopen(RSTRING(str)->ptr, O_RDONLY, 0); +#else + if (rename(fn, RSTRING(str)->ptr) < 0) { + rb_warn("Can't rename %s to %s: %s, skipping file", + fn, RSTRING(str)->ptr, strerror(errno)); + close(fr); + goto retry; + } +#endif + } + else { +#ifdef NO_SAFE_RENAME + rb_fatal("Can't do inplace edit without backup"); +#else + if (unlink(fn) < 0) { + rb_warn("Can't remove %s: %s, skipping file", + fn, strerror(errno)); + close(fr); + goto retry; + } +#endif + } + fw = rb_sysopen(fn, O_WRONLY|O_CREAT|O_TRUNC, 0666); +#ifndef NO_SAFE_RENAME + fstat(fw, &st2); +#ifdef HAVE_FCHMOD + fchmod(fw, st.st_mode); +#else + chmod(fn, st.st_mode); +#endif + if (st.st_uid!=st2.st_uid || st.st_gid!=st2.st_gid) { + fchown(fw, st.st_uid, st.st_gid); + } +#endif + rb_stdout = prep_io(fw, FMODE_WRITABLE, rb_cFile, fn); + if (stdout_binmode) rb_io_binmode(rb_stdout); + } + current_file = prep_io(fr, FMODE_READABLE, rb_cFile, fn); + } + if (binmode) rb_io_binmode(current_file); + } + else { + next_p = 1; + return Qfalse; + } + } + else if (next_p == -1) { + current_file = rb_stdin; + filename = rb_str_new2("-"); + if (ruby_inplace_mode) { + rb_warn("Can't do inplace edit for stdio"); + rb_stdout = orig_stdout; + } + } + return Qtrue; +} + +static VALUE +argf_getline(argc, argv) + int argc; + VALUE *argv; +{ + VALUE line; + + retry: + if (!next_argv()) return Qnil; + if (argc == 0 && rb_rs == rb_default_rs) { + line = rb_io_gets(current_file); + } + else { + VALUE rs; + + if (argc == 0) { + rs = rb_rs; + } + else { + rb_scan_args(argc, argv, "1", &rs); + if (!NIL_P(rs)) StringValue(rs); + } + line = rb_io_getline(rs, current_file); + } + if (NIL_P(line) && next_p != -1) { + argf_close(current_file); + next_p = 1; + goto retry; + } + if (!NIL_P(line)) { + gets_lineno++; + lineno = INT2FIX(gets_lineno); + } + return line; +} + +/* + * call-seq: + * gets(separator=$/) => string or nil + * + * Returns (and assigns to <code>$_</code>) the next line from the list + * of files in +ARGV+ (or <code>$*</code>), or from standard + * input if no files are present on the command line. Returns + * +nil+ at end of file. The optional argument specifies the + * record separator. The separator is included with the contents of + * each record. A separator of +nil+ reads the entire + * contents, and a zero-length separator reads the input one paragraph + * at a time, where paragraphs are divided by two consecutive newlines. + * If multiple filenames are present in +ARGV+, + * +gets(nil)+ will read the contents one file at a time. + * + * ARGV << "testfile" + * print while gets + * + * <em>produces:</em> + * + * This is line one + * This is line two + * This is line three + * And so on... + * + * The style of programming using <code>$_</code> as an implicit + * parameter is gradually losing favor in the Ruby community. + */ + +static VALUE +rb_f_gets(argc, argv) + int argc; + VALUE *argv; +{ + VALUE line; + + if (!next_argv()) return Qnil; + if (TYPE(current_file) != T_FILE) { + line = rb_funcall3(current_file, rb_intern("gets"), argc, argv); + } + else { + line = argf_getline(argc, argv); + } + rb_lastline_set(line); + return line; +} + +VALUE +rb_gets() +{ + VALUE line; + + if (rb_rs != rb_default_rs) { + return rb_f_gets(0, 0); + } + + retry: + if (!next_argv()) return Qnil; + line = rb_io_gets(current_file); + if (NIL_P(line) && next_p != -1) { + argf_close(current_file); + next_p = 1; + goto retry; + } + rb_lastline_set(line); + if (!NIL_P(line)) { + gets_lineno++; + lineno = INT2FIX(gets_lineno); + } + + return line; +} + +/* + * call-seq: + * readline(separator=$/) => string + * + * Equivalent to <code>Kernel::gets</code>, except + * +readline+ raises +EOFError+ at end of file. + */ + +static VALUE +rb_f_readline(argc, argv) + int argc; + VALUE *argv; +{ + VALUE line; + + if (!next_argv()) rb_eof_error(); + ARGF_FORWARD(argc, argv); + line = rb_f_gets(argc, argv); + if (NIL_P(line)) { + rb_eof_error(); + } + + return line; +} + +/* + * obsolete + */ +static VALUE +rb_f_getc() +{ + rb_warn("getc is obsolete; use STDIN.getc instead"); + if (TYPE(rb_stdin) != T_FILE) { + return rb_funcall3(rb_stdin, rb_intern("getc"), 0, 0); + } + return rb_io_getc(rb_stdin); +} + +/* + * call-seq: + * readlines(separator=$/) => array + * + * Returns an array containing the lines returned by calling + * <code>Kernel.gets(<i>separator</i>)</code> until the end of file. + */ + +static VALUE +rb_f_readlines(argc, argv) + int argc; + VALUE *argv; +{ + VALUE line, ary; + + NEXT_ARGF_FORWARD(argc, argv); + ary = rb_ary_new(); + while (!NIL_P(line = argf_getline(argc, argv))) { + rb_ary_push(ary, line); + } + + return ary; +} + +/* + * call-seq: + * `cmd` => string + * + * Returns the standard output of running _cmd_ in a subshell. + * The built-in syntax <code>%x{...}</code> uses + * this method. Sets <code>$?</code> to the process status. + * + * `date` #=> "Wed Apr 9 08:56:30 CDT 2003\n" + * `ls testdir`.split[1] #=> "main.rb" + * `echo oops && exit 99` #=> "oops\n" + * $?.exitstatus #=> 99 + */ + +static VALUE +rb_f_backquote(obj, str) + VALUE obj, str; +{ + VALUE port, result; + OpenFile *fptr; + + SafeStringValue(str); + port = pipe_open(1, &str, "r"); + if (NIL_P(port)) return rb_str_new(0,0); + + GetOpenFile(port, fptr); + result = read_all(fptr, remain_size(fptr), Qnil); + rb_io_close(port); + + return result; +} + +#ifdef HAVE_SYS_SELECT_H +#include <sys/select.h> +#endif + +/* + * call-seq: + * IO.select(read_array + * [, write_array + * [, error_array + * [, timeout]]] ) => array or nil + * + * See <code>Kernel#select</code>. + */ + +static VALUE +rb_f_select(argc, argv, obj) + int argc; + VALUE *argv; + VALUE obj; +{ + VALUE read, write, except, timeout, res, list; + fd_set rset, wset, eset, pset; + fd_set *rp, *wp, *ep; + struct timeval *tp, timerec; + OpenFile *fptr; + long i; + int max = 0, n; + int interrupt_flag = 0; + int pending = 0; + + rb_scan_args(argc, argv, "13", &read, &write, &except, &timeout); + if (NIL_P(timeout)) { + tp = 0; + } + else { + timerec = rb_time_interval(timeout); + tp = &timerec; + } + + FD_ZERO(&pset); + if (!NIL_P(read)) { + Check_Type(read, T_ARRAY); + rp = &rset; + FD_ZERO(rp); + for (i=0; i<RARRAY(read)->len; i++) { + GetOpenFile(rb_io_get_io(RARRAY(read)->ptr[i]), fptr); + FD_SET(fptr->fd, rp); + if (READ_DATA_PENDING(fptr)) { /* check for buffered data */ + pending++; + FD_SET(fptr->fd, &pset); + } + if (max < fptr->fd) max = fptr->fd; + } + if (pending) { /* no blocking if there's buffered data */ + timerec.tv_sec = timerec.tv_usec = 0; + tp = &timerec; + } + } + else + rp = 0; + + if (!NIL_P(write)) { + Check_Type(write, T_ARRAY); + wp = &wset; + FD_ZERO(wp); + for (i=0; i<RARRAY(write)->len; i++) { + GetOpenFile(rb_io_get_io(RARRAY(write)->ptr[i]), fptr); + FD_SET(fptr->fd, wp); + if (max < fptr->fd) max = fptr->fd; + } + } + else + wp = 0; + + if (!NIL_P(except)) { + Check_Type(except, T_ARRAY); + ep = &eset; + FD_ZERO(ep); + for (i=0; i<RARRAY(except)->len; i++) { + GetOpenFile(rb_io_get_io(RARRAY(except)->ptr[i]), fptr); + FD_SET(fptr->fd, ep); + if (max < fptr->fd) max = fptr->fd; + } + } + else { + ep = 0; + } + + max++; + + n = rb_thread_select(max, rp, wp, ep, tp); + if (n < 0) { + rb_sys_fail(0); + } + if (!pending && n == 0) return Qnil; /* returns nil on timeout */ + + res = rb_ary_new2(3); + rb_ary_push(res, rp?rb_ary_new():rb_ary_new2(0)); + rb_ary_push(res, wp?rb_ary_new():rb_ary_new2(0)); + rb_ary_push(res, ep?rb_ary_new():rb_ary_new2(0)); + + if (interrupt_flag == 0) { + if (rp) { + list = RARRAY(res)->ptr[0]; + for (i=0; i< RARRAY(read)->len; i++) { + GetOpenFile(rb_io_get_io(RARRAY(read)->ptr[i]), fptr); + if (FD_ISSET(fptr->fd, rp) + || FD_ISSET(fptr->fd, &pset)) { + rb_ary_push(list, rb_ary_entry(read, i)); + } + } + } + + if (wp) { + list = RARRAY(res)->ptr[1]; + for (i=0; i< RARRAY(write)->len; i++) { + GetOpenFile(rb_io_get_io(RARRAY(write)->ptr[i]), fptr); + if (FD_ISSET(fptr->fd, wp)) { + rb_ary_push(list, rb_ary_entry(write, i)); + } + } + } + + if (ep) { + list = RARRAY(res)->ptr[2]; + for (i=0; i< RARRAY(except)->len; i++) { + GetOpenFile(rb_io_get_io(RARRAY(except)->ptr[i]), fptr); + if (FD_ISSET(fptr->fd, ep)) { + rb_ary_push(list, rb_ary_entry(except, i)); + } + } + } + } + + return res; /* returns an empty array on interrupt */ +} + +#if !defined(MSDOS) && !defined(__human68k__) +static int +io_cntl(fd, cmd, narg, io_p) + int fd, cmd, io_p; + long narg; +{ + int retval; + +#ifdef HAVE_FCNTL + TRAP_BEG; +# if defined(__CYGWIN__) + retval = io_p?ioctl(fd, cmd, (void*)narg):fcntl(fd, cmd, narg); +# else + retval = io_p?ioctl(fd, cmd, narg):fcntl(fd, cmd, narg); +# endif + TRAP_END; +#else + if (!io_p) { + rb_notimplement(); + } + TRAP_BEG; + retval = ioctl(fd, cmd, narg); + TRAP_END; +#endif + return retval; +} +#endif + +static VALUE +rb_io_ctl(io, req, arg, io_p) + VALUE io, req, arg; + int io_p; +{ +#if !defined(MSDOS) && !defined(__human68k__) + int cmd = NUM2ULONG(req); + OpenFile *fptr; + long len = 0; + long narg = 0; + int retval; + + rb_secure(2); + + if (NIL_P(arg) || arg == Qfalse) { + narg = 0; + } + else if (FIXNUM_P(arg)) { + narg = FIX2LONG(arg); + } + else if (arg == Qtrue) { + narg = 1; + } + else { + VALUE tmp = rb_check_string_type(arg); + + if (NIL_P(tmp)) { + narg = NUM2LONG(arg); + } + else { + arg = tmp; +#ifdef IOCPARM_MASK +#ifndef IOCPARM_LEN +#define IOCPARM_LEN(x) (((x) >> 16) & IOCPARM_MASK) +#endif +#endif +#ifdef IOCPARM_LEN + len = IOCPARM_LEN(cmd); /* on BSDish systems we're safe */ +#else + len = 256; /* otherwise guess at what's safe */ +#endif + rb_str_modify(arg); + + if (len <= RSTRING(arg)->len) { + len = RSTRING(arg)->len; + } + if (RSTRING(arg)->len < len) { + rb_str_resize(arg, len+1); + } + RSTRING(arg)->ptr[len] = 17; /* a little sanity check here */ + narg = (long)RSTRING(arg)->ptr; + } + } + GetOpenFile(io, fptr); + retval = io_cntl(fptr->fd, cmd, narg, io_p); + if (retval < 0) rb_sys_fail(fptr->path); + if (TYPE(arg) == T_STRING && RSTRING(arg)->ptr[len] != 17) { + rb_raise(rb_eArgError, "return value overflowed string"); + } + + return INT2NUM(retval); +#else + rb_notimplement(); + return Qnil; /* not reached */ +#endif +} + + +/* + * call-seq: + * ios.ioctl(integer_cmd, arg) => integer + * + * Provides a mechanism for issuing low-level commands to control or + * query I/O devices. Arguments and results are platform dependent. If + * <i>arg</i> is a number, its value is passed directly. If it is a + * string, it is interpreted as a binary sequence of bytes. On Unix + * platforms, see <code>ioctl(2)</code> for details. Not implemented on + * all platforms. + */ + +static VALUE +rb_io_ioctl(argc, argv, io) + int argc; + VALUE *argv; + VALUE io; +{ + VALUE req, arg; + + rb_scan_args(argc, argv, "11", &req, &arg); + return rb_io_ctl(io, req, arg, 1); +} + +/* + * call-seq: + * ios.fcntl(integer_cmd, arg) => integer + * + * Provides a mechanism for issuing low-level commands to control or + * query file-oriented I/O streams. Arguments and results are platform + * dependent. If <i>arg</i> is a number, its value is passed + * directly. If it is a string, it is interpreted as a binary sequence + * of bytes (<code>Array#pack</code> might be a useful way to build this + * string). On Unix platforms, see <code>fcntl(2)</code> for details. + * Not implemented on all platforms. + */ + +static VALUE +rb_io_fcntl(argc, argv, io) + int argc; + VALUE *argv; + VALUE io; +{ +#ifdef HAVE_FCNTL + VALUE req, arg; + + rb_scan_args(argc, argv, "11", &req, &arg); + return rb_io_ctl(io, req, arg, 0); +#else + rb_notimplement(); + return Qnil; /* not reached */ +#endif +} + +/* + * call-seq: + * syscall(fixnum [, args...]) => integer + * + * Calls the operating system function identified by _fixnum_, + * passing in the arguments, which must be either +String+ + * objects, or +Integer+ objects that ultimately fit within + * a native +long+. Up to nine parameters may be passed (14 + * on the Atari-ST). The function identified by _fixnum_ is system + * dependent. On some Unix systems, the numbers may be obtained from a + * header file called <code>syscall.h</code>. + * + * syscall 4, 1, "hello\n", 6 # '4' is write(2) on our box + * + * <em>produces:</em> + * + * hello + */ + +static VALUE +rb_f_syscall(argc, argv) + int argc; + VALUE *argv; +{ +#if defined(HAVE_SYSCALL) && !defined(__CHECKER__) +#ifdef atarist + unsigned long arg[14]; /* yes, we really need that many ! */ +#else + unsigned long arg[8]; +#endif + int retval = -1; + int i = 1; + int items = argc - 1; + + /* This probably won't work on machines where sizeof(long) != sizeof(int) + * or where sizeof(long) != sizeof(char*). But such machines will + * not likely have syscall implemented either, so who cares? + */ + + rb_secure(2); + if (argc == 0) + rb_raise(rb_eArgError, "too few arguments for syscall"); + arg[0] = NUM2LONG(argv[0]); argv++; + while (items--) { + VALUE v = rb_check_string_type(*argv); + + if (!NIL_P(v)) { + StringValue(v); + rb_str_modify(v); + arg[i] = (unsigned long)RSTRING(v)->ptr; + } + else { + arg[i] = (unsigned long)NUM2LONG(*argv); + } + argv++; + i++; + } + TRAP_BEG; + switch (argc) { + case 1: + retval = syscall(arg[0]); + break; + case 2: + retval = syscall(arg[0],arg[1]); + break; + case 3: + retval = syscall(arg[0],arg[1],arg[2]); + break; + case 4: + retval = syscall(arg[0],arg[1],arg[2],arg[3]); + break; + case 5: + retval = syscall(arg[0],arg[1],arg[2],arg[3],arg[4]); + break; + case 6: + retval = syscall(arg[0],arg[1],arg[2],arg[3],arg[4],arg[5]); + break; + case 7: + retval = syscall(arg[0],arg[1],arg[2],arg[3],arg[4],arg[5],arg[6]); + break; + case 8: + retval = syscall(arg[0],arg[1],arg[2],arg[3],arg[4],arg[5],arg[6], + arg[7]); + break; +#ifdef atarist + case 9: + retval = syscall(arg[0],arg[1],arg[2],arg[3],arg[4],arg[5],arg[6], + arg[7], arg[8]); + break; + case 10: + retval = syscall(arg[0],arg[1],arg[2],arg[3],arg[4],arg[5],arg[6], + arg[7], arg[8], arg[9]); + break; + case 11: + retval = syscall(arg[0],arg[1],arg[2],arg[3],arg[4],arg[5],arg[6], + arg[7], arg[8], arg[9], arg[10]); + break; + case 12: + retval = syscall(arg[0],arg[1],arg[2],arg[3],arg[4],arg[5],arg[6], + arg[7], arg[8], arg[9], arg[10], arg[11]); + break; + case 13: + retval = syscall(arg[0],arg[1],arg[2],arg[3],arg[4],arg[5],arg[6], + arg[7], arg[8], arg[9], arg[10], arg[11], arg[12]); + break; + case 14: + retval = syscall(arg[0],arg[1],arg[2],arg[3],arg[4],arg[5],arg[6], + arg[7], arg[8], arg[9], arg[10], arg[11], arg[12], arg[13]); + break; +#endif /* atarist */ + } + TRAP_END; + if (retval < 0) rb_sys_fail(0); + return INT2NUM(retval); +#else + rb_notimplement(); + return Qnil; /* not reached */ +#endif +} + +static VALUE io_new_instance _((VALUE)); +static VALUE +io_new_instance(args) + VALUE args; +{ + return rb_class_new_instance(2, (VALUE*)args+1, *(VALUE*)args); +} + +/* + * call-seq: + * IO.pipe -> array + * + * Creates a pair of pipe endpoints (connected to each other) and + * returns them as a two-element array of <code>IO</code> objects: + * <code>[</code> <i>read_file</i>, <i>write_file</i> <code>]</code>. Not + * available on all platforms. + * + * In the example below, the two processes close the ends of the pipe + * that they are not using. This is not just a cosmetic nicety. The + * read end of a pipe will not generate an end of file condition if + * there are any writers with the pipe still open. In the case of the + * parent process, the <code>rd.read</code> will never return if it + * does not first issue a <code>wr.close</code>. + * + * rd, wr = IO.pipe + * + * if fork + * wr.close + * puts "Parent got: <#{rd.read}>" + * rd.close + * Process.wait + * else + * rd.close + * puts "Sending message to parent" + * wr.write "Hi Dad" + * wr.close + * end + * + * <em>produces:</em> + * + * Sending message to parent + * Parent got: <Hi Dad> + */ + +static VALUE +rb_io_s_pipe(klass) + VALUE klass; +{ +#ifndef __human68k__ + int pipes[2], state; + VALUE r, w, args[3]; + +#ifdef _WIN32 + if (_pipe(pipes, 1024, O_BINARY) == -1) +#else + if (pipe(pipes) == -1) +#endif + rb_sys_fail(0); + + args[0] = klass; + args[1] = INT2NUM(pipes[0]); + args[2] = INT2FIX(O_RDONLY); + r = rb_protect(io_new_instance, (VALUE)args, &state); + if (state) { + close(pipes[0]); + close(pipes[1]); + rb_jump_tag(state); + } + args[1] = INT2NUM(pipes[1]); + args[2] = INT2FIX(O_WRONLY); + w = rb_protect(io_new_instance, (VALUE)args, &state); + if (state) { + close(pipes[1]); + if (!NIL_P(r)) rb_io_close(r); + rb_jump_tag(state); + } + rb_io_synchronized(RFILE(w)->fptr); + + return rb_assoc_new(r, w); +#else + rb_notimplement(); + return Qnil; /* not reached */ +#endif +} + +struct foreach_arg { + int argc; + VALUE sep; + VALUE io; +}; + +static VALUE +io_s_foreach(arg) + struct foreach_arg *arg; +{ + VALUE str; + + while (!NIL_P(str = rb_io_getline(arg->sep, arg->io))) { + rb_yield(str); + } + return Qnil; +} + +/* + * call-seq: + * IO.foreach(name, sep_string=$/) {|line| block } => nil + * + * Executes the block for every line in the named I/O port, where lines + * are separated by <em>sep_string</em>. + * + * IO.foreach("testfile") {|x| print "GOT ", x } + * + * <em>produces:</em> + * + * GOT This is line one + * GOT This is line two + * GOT This is line three + * GOT And so on... + */ + +static VALUE +rb_io_s_foreach(argc, argv) + int argc; + VALUE *argv; +{ + VALUE fname; + struct foreach_arg arg; + + rb_scan_args(argc, argv, "11", &fname, &arg.sep); + FilePathValue(fname); + if (argc == 1) { + arg.sep = rb_default_rs; + } + else if (!NIL_P(arg.sep)) { + StringValue(arg.sep); + } + arg.io = rb_io_open(RSTRING(fname)->ptr, "r"); + if (NIL_P(arg.io)) return Qnil; + + return rb_ensure(io_s_foreach, (VALUE)&arg, rb_io_close, arg.io); +} + +static VALUE +io_s_readlines(arg) + struct foreach_arg *arg; +{ + return rb_io_readlines(arg->argc, &arg->sep, arg->io); +} + +/* + * call-seq: + * IO.readlines(name, sep_string=$/) => array + * + * Reads the entire file specified by <i>name</i> as individual + * lines, and returns those lines in an array. Lines are separated by + * <i>sep_string</i>. + * + * a = IO.readlines("testfile") + * a[0] #=> "This is line one\n" + * + */ + +static VALUE +rb_io_s_readlines(argc, argv, io) + int argc; + VALUE *argv; + VALUE io; +{ + VALUE fname; + struct foreach_arg arg; + + rb_scan_args(argc, argv, "11", &fname, &arg.sep); + FilePathValue(fname); + arg.argc = argc - 1; + arg.io = rb_io_open(RSTRING(fname)->ptr, "r"); + if (NIL_P(arg.io)) return Qnil; + return rb_ensure(io_s_readlines, (VALUE)&arg, rb_io_close, arg.io); +} + +static VALUE +io_s_read(arg) + struct foreach_arg *arg; +{ + return io_read(arg->argc, &arg->sep, arg->io); +} + +/* + * call-seq: + * IO.read(name, [length [, offset]] ) => string + * + * Opens the file, optionally seeks to the given offset, then returns + * <i>length</i> bytes (defaulting to the rest of the file). + * <code>read</code> ensures the file is closed before returning. + * + * IO.read("testfile") #=> "This is line one\nThis is line two\nThis is line three\nAnd so on...\n" + * IO.read("testfile", 20) #=> "This is line one\nThi" + * IO.read("testfile", 20, 10) #=> "ne one\nThis is line " + */ + +static VALUE +rb_io_s_read(argc, argv, io) + int argc; + VALUE *argv; + VALUE io; +{ + VALUE fname, offset; + struct foreach_arg arg; + + rb_scan_args(argc, argv, "12", &fname, &arg.sep, &offset); + FilePathValue(fname); + arg.argc = argc ? 1 : 0; + arg.io = rb_io_open(RSTRING(fname)->ptr, "r"); + if (NIL_P(arg.io)) return Qnil; + if (!NIL_P(offset)) { + rb_io_seek(arg.io, offset, SEEK_SET); + } + return rb_ensure(io_s_read, (VALUE)&arg, rb_io_close, arg.io); +} + +static VALUE +argf_tell() +{ + if (!next_argv()) { + rb_raise(rb_eArgError, "no stream to tell"); + } + ARGF_FORWARD(0, 0); + return rb_io_tell(current_file); +} + +static VALUE +argf_seek_m(argc, argv, self) + int argc; + VALUE *argv; + VALUE self; +{ + if (!next_argv()) { + rb_raise(rb_eArgError, "no stream to seek"); + } + ARGF_FORWARD(argc, argv); + return rb_io_seek_m(argc, argv, current_file); +} + +static VALUE +argf_set_pos(self, offset) + VALUE self, offset; +{ + if (!next_argv()) { + rb_raise(rb_eArgError, "no stream to set position"); + } + ARGF_FORWARD(1, &offset); + return rb_io_set_pos(current_file, offset); +} + +static VALUE +argf_rewind() +{ + if (!next_argv()) { + rb_raise(rb_eArgError, "no stream to rewind"); + } + ARGF_FORWARD(0, 0); + return rb_io_rewind(current_file); +} + +static VALUE +argf_fileno() +{ + if (!next_argv()) { + rb_raise(rb_eArgError, "no stream"); + } + ARGF_FORWARD(0, 0); + return rb_io_fileno(current_file); +} + +static VALUE +argf_to_io() +{ + next_argv(); + ARGF_FORWARD(0, 0); + return current_file; +} + +static VALUE +argf_eof() +{ + if (current_file) { + if (init_p == 0) return Qtrue; + ARGF_FORWARD(0, 0); + if (rb_io_eof(current_file)) { + return Qtrue; + } + } + return Qfalse; +} + +static VALUE +argf_read(argc, argv) + int argc; + VALUE *argv; +{ + VALUE tmp, str, length; + long len = 0; + + rb_scan_args(argc, argv, "02", &length, &str); + if (!NIL_P(length)) { + len = NUM2LONG(argv[0]); + } + if (!NIL_P(str)) { + StringValue(str); + rb_str_resize(str,0); + argv[1] = Qnil; + } + + retry: + if (!next_argv()) { + return str; + } + if (TYPE(current_file) != T_FILE) { + tmp = argf_forward(argc, argv); + } + else { + tmp = io_read(argc, argv, current_file); + } + if (NIL_P(str)) str = tmp; + else if (!NIL_P(tmp)) rb_str_append(str, tmp); + if (NIL_P(tmp) || NIL_P(length)) { + if (next_p != -1) { + argf_close(current_file); + next_p = 1; + goto retry; + } + } + else if (argc >= 1) { + if (RSTRING(str)->len < len) { + len -= RSTRING(str)->len; + argv[0] = INT2NUM(len); + goto retry; + } + } + return str; +} + +static VALUE +argf_readpartial_rescue(VALUE dummy) +{ + return Qnil; +} + +static VALUE +argf_readpartial(int argc, VALUE *argv) +{ + VALUE tmp, str, length; + + rb_scan_args(argc, argv, "11", &length, &str); + if (!NIL_P(str)) { + StringValue(str); + argv[1] = str; + } + + if (!next_argv()) { + rb_str_resize(str, 0); + rb_eof_error(); + } + if (TYPE(current_file) != T_FILE) { + tmp = rb_rescue2(argf_forward, (VALUE)argv, + argf_readpartial_rescue, (VALUE)Qnil, + rb_eEOFError, (VALUE)0); + } + else { + tmp = io_getpartial(argc, argv, current_file); + } + if (NIL_P(tmp)) { + if (next_p == -1) { + rb_eof_error(); + } + argf_close(current_file); + next_p = 1; + if (RARRAY(rb_argv)->len == 0) + rb_eof_error(); + if (NIL_P(str)) + str = rb_str_new(NULL, 0); + return str; + } + return tmp; +} + +static VALUE +argf_getc() +{ + VALUE byte; + + retry: + if (!next_argv()) return Qnil; + if (TYPE(current_file) != T_FILE) { + byte = rb_funcall3(current_file, rb_intern("getc"), 0, 0); + } + else { + byte = rb_io_getc(current_file); + } + if (NIL_P(byte) && next_p != -1) { + argf_close(current_file); + next_p = 1; + goto retry; + } + + return byte; +} + +static VALUE +argf_readchar() +{ + VALUE c; + + NEXT_ARGF_FORWARD(0, 0); + c = argf_getc(); + if (NIL_P(c)) { + rb_eof_error(); + } + return c; +} + +static VALUE +argf_each_line(argc, argv) + int argc; + VALUE *argv; +{ + VALUE str; + + if (!next_argv()) return Qnil; + if (TYPE(current_file) != T_FILE) { + for (;;) { + if (!next_argv()) return argf; + rb_iterate(rb_each, current_file, rb_yield, 0); + next_p = 1; + } + } + while (!NIL_P(str = argf_getline(argc, argv))) { + rb_yield(str); + } + return argf; +} + +static VALUE +argf_each_byte() +{ + VALUE byte; + + while (!NIL_P(byte = argf_getc())) { + rb_yield(byte); + } + return argf; +} + +static VALUE +argf_filename() +{ + next_argv(); + return filename; +} + +static VALUE +argf_file() +{ + next_argv(); + return current_file; +} + +static VALUE +argf_binmode() +{ + binmode = 1; + next_argv(); + ARGF_FORWARD(0, 0); + rb_io_binmode(current_file); + return argf; +} + +static VALUE +argf_skip() +{ + if (next_p != -1) { + argf_close(current_file); + next_p = 1; + } + return argf; +} + +static VALUE +argf_close_m() +{ + next_argv(); + argf_close(current_file); + if (next_p != -1) { + next_p = 1; + } + gets_lineno = 0; + return argf; +} + +static VALUE +argf_closed() +{ + next_argv(); + ARGF_FORWARD(0, 0); + return rb_io_closed(current_file); +} + +static VALUE +argf_to_s() +{ + return rb_str_new2("ARGF"); +} + +static VALUE +opt_i_get() +{ + if (!ruby_inplace_mode) return Qnil; + return rb_str_new2(ruby_inplace_mode); +} + +static void +opt_i_set(val) + VALUE val; +{ + if (!RTEST(val)) { + if (ruby_inplace_mode) free(ruby_inplace_mode); + ruby_inplace_mode = 0; + return; + } + StringValue(val); + if (ruby_inplace_mode) free(ruby_inplace_mode); + ruby_inplace_mode = 0; + ruby_inplace_mode = strdup(RSTRING(val)->ptr); +} + +/* + * Class <code>IO</code> is the basis for all input and output in Ruby. + * An I/O stream may be <em>duplexed</em> (that is, bidirectional), and + * so may use more than one native operating system stream. + * + * Many of the examples in this section use class <code>File</code>, + * the only standard subclass of <code>IO</code>. The two classes are + * closely associated. + * + * As used in this section, <em>portname</em> may take any of the + * following forms. + * + * * A plain string represents a filename suitable for the underlying + * operating system. + * + * * A string starting with ``<code>|</code>'' indicates a subprocess. + * The remainder of the string following the ``<code>|</code>'' is + * invoked as a process with appropriate input/output channels + * connected to it. + * + * * A string equal to ``<code>|-</code>'' will create another Ruby + * instance as a subprocess. + * + * Ruby will convert pathnames between different operating system + * conventions if possible. For instance, on a Windows system the + * filename ``<code>/gumby/ruby/test.rb</code>'' will be opened as + * ``<code>\gumby\ruby\test.rb</code>''. When specifying a + * Windows-style filename in a Ruby string, remember to escape the + * backslashes: + * + * "c:\\gumby\\ruby\\test.rb" + * + * Our examples here will use the Unix-style forward slashes; + * <code>File::SEPARATOR</code> can be used to get the + * platform-specific separator character. + * + * I/O ports may be opened in any one of several different modes, which + * are shown in this section as <em>mode</em>. The mode may + * either be a Fixnum or a String. If numeric, it should be + * one of the operating system specific constants (O_RDONLY, + * O_WRONLY, O_RDWR, O_APPEND and so on). See man open(2) for + * more information. + * + * If the mode is given as a String, it must be one of the + * values listed in the following table. + * + * Mode | Meaning + * -----+-------------------------------------------------------- + * "r" | Read-only, starts at beginning of file (default mode). + * -----+-------------------------------------------------------- + * "r+" | Read-write, starts at beginning of file. + * -----+-------------------------------------------------------- + * "w" | Write-only, truncates existing file + * | to zero length or creates a new file for writing. + * -----+-------------------------------------------------------- + * "w+" | Read-write, truncates existing file to zero length + * | or creates a new file for reading and writing. + * -----+-------------------------------------------------------- + * "a" | Write-only, starts at end of file if file exists, + * | otherwise creates a new file for writing. + * -----+-------------------------------------------------------- + * "a+" | Read-write, starts at end of file if file exists, + * | otherwise creates a new file for reading and + * | writing. + * -----+-------------------------------------------------------- + * "b" | (DOS/Windows only) Binary file mode (may appear with + * | any of the key letters listed above). + * + * + * The global constant ARGF (also accessible as $<) provides an + * IO-like stream which allows access to all files mentioned on the + * command line (or STDIN if no files are mentioned). ARGF provides + * the methods <code>#path</code> and <code>#filename</code> to access + * the name of the file currently being read. + */ + +void +Init_IO() +{ +#ifdef __CYGWIN__ +#include <sys/cygwin.h> + static struct __cygwin_perfile pf[] = + { + {"", O_RDONLY | O_BINARY}, + {"", O_WRONLY | O_BINARY}, + {"", O_RDWR | O_BINARY}, + {"", O_APPEND | O_BINARY}, + {NULL, 0} + }; + cygwin_internal(CW_PERFILE, pf); +#endif + + rb_eIOError = rb_define_class("IOError", rb_eStandardError); + rb_eEOFError = rb_define_class("EOFError", rb_eIOError); + + id_write = rb_intern("write"); + id_read = rb_intern("read"); + id_getc = rb_intern("getc"); + id_flush = rb_intern("flush"); + + rb_define_global_function("syscall", rb_f_syscall, -1); + + rb_define_global_function("open", rb_f_open, -1); + rb_define_global_function("printf", rb_f_printf, -1); + rb_define_global_function("print", rb_f_print, -1); + rb_define_global_function("putc", rb_f_putc, 1); + rb_define_global_function("puts", rb_f_puts, -1); + rb_define_global_function("gets", rb_f_gets, -1); + rb_define_global_function("readline", rb_f_readline, -1); + rb_define_global_function("getc", rb_f_getc, 0); + rb_define_global_function("select", rb_f_select, -1); + + rb_define_global_function("readlines", rb_f_readlines, -1); + + rb_define_global_function("`", rb_f_backquote, 1); + + rb_define_global_function("p", rb_f_p, -1); + rb_define_method(rb_mKernel, "display", rb_obj_display, -1); + + rb_cIO = rb_define_class("IO", rb_cObject); + rb_include_module(rb_cIO, rb_mEnumerable); + + rb_define_alloc_func(rb_cIO, io_alloc); + rb_define_singleton_method(rb_cIO, "new", rb_io_s_new, -1); + rb_define_singleton_method(rb_cIO, "open", rb_io_s_open, -1); + rb_define_singleton_method(rb_cIO, "sysopen", rb_io_s_sysopen, -1); + rb_define_singleton_method(rb_cIO, "for_fd", rb_io_s_for_fd, -1); + rb_define_singleton_method(rb_cIO, "popen", rb_io_s_popen, -1); + rb_define_singleton_method(rb_cIO, "foreach", rb_io_s_foreach, -1); + rb_define_singleton_method(rb_cIO, "readlines", rb_io_s_readlines, -1); + rb_define_singleton_method(rb_cIO, "read", rb_io_s_read, -1); + rb_define_singleton_method(rb_cIO, "select", rb_f_select, -1); + rb_define_singleton_method(rb_cIO, "pipe", rb_io_s_pipe, 0); + + rb_define_method(rb_cIO, "initialize", rb_io_initialize, -1); + + rb_output_fs = Qnil; + rb_define_hooked_variable("$,", &rb_output_fs, 0, rb_str_setter); + + rb_rs = rb_default_rs = rb_str_new2("\n"); + rb_output_rs = Qnil; + rb_global_variable(&rb_default_rs); + OBJ_FREEZE(rb_default_rs); /* avoid modifying RS_default */ + rb_define_hooked_variable("$/", &rb_rs, 0, rb_str_setter); + rb_define_hooked_variable("$-0", &rb_rs, 0, rb_str_setter); + rb_define_hooked_variable("$\\", &rb_output_rs, 0, rb_str_setter); + + rb_define_hooked_variable("$.", &lineno, 0, lineno_setter); + rb_define_virtual_variable("$_", rb_lastline_get, rb_lastline_set); + + rb_define_method(rb_cIO, "initialize_copy", rb_io_init_copy, 1); + rb_define_method(rb_cIO, "reopen", rb_io_reopen, -1); + + rb_define_method(rb_cIO, "print", rb_io_print, -1); + rb_define_method(rb_cIO, "putc", rb_io_putc, 1); + rb_define_method(rb_cIO, "puts", rb_io_puts, -1); + rb_define_method(rb_cIO, "printf", rb_io_printf, -1); + + rb_define_method(rb_cIO, "each", rb_io_each_line, -1); + rb_define_method(rb_cIO, "each_line", rb_io_each_line, -1); + rb_define_method(rb_cIO, "each_byte", rb_io_each_byte, 0); + + rb_define_method(rb_cIO, "syswrite", rb_io_syswrite, 1); + rb_define_method(rb_cIO, "sysread", rb_io_sysread, -1); + + rb_define_method(rb_cIO, "fileno", rb_io_fileno, 0); + rb_define_alias(rb_cIO, "to_i", "fileno"); + rb_define_method(rb_cIO, "to_io", rb_io_to_io, 0); + + rb_define_method(rb_cIO, "fsync", rb_io_fsync, 0); + rb_define_method(rb_cIO, "sync", rb_io_sync, 0); + rb_define_method(rb_cIO, "sync=", rb_io_set_sync, 1); + + rb_define_method(rb_cIO, "lineno", rb_io_lineno, 0); + rb_define_method(rb_cIO, "lineno=", rb_io_set_lineno, 1); + + rb_define_method(rb_cIO, "readlines", rb_io_readlines, -1); + + rb_define_method(rb_cIO, "readpartial", io_readpartial, -1); + rb_define_method(rb_cIO, "read", io_read, -1); + rb_define_method(rb_cIO, "write", io_write, 1); + rb_define_method(rb_cIO, "gets", rb_io_gets_m, -1); + rb_define_method(rb_cIO, "readline", rb_io_readline, -1); + rb_define_method(rb_cIO, "getc", rb_io_getc, 0); + rb_define_method(rb_cIO, "readchar", rb_io_readchar, 0); + rb_define_method(rb_cIO, "ungetc",rb_io_ungetc, 1); + rb_define_method(rb_cIO, "<<", rb_io_addstr, 1); + rb_define_method(rb_cIO, "flush", rb_io_flush, 0); + rb_define_method(rb_cIO, "tell", rb_io_tell, 0); + rb_define_method(rb_cIO, "seek", rb_io_seek_m, -1); + rb_define_const(rb_cIO, "SEEK_SET", INT2FIX(SEEK_SET)); + rb_define_const(rb_cIO, "SEEK_CUR", INT2FIX(SEEK_CUR)); + rb_define_const(rb_cIO, "SEEK_END", INT2FIX(SEEK_END)); + rb_define_method(rb_cIO, "rewind", rb_io_rewind, 0); + rb_define_method(rb_cIO, "pos", rb_io_tell, 0); + rb_define_method(rb_cIO, "pos=", rb_io_set_pos, 1); + rb_define_method(rb_cIO, "eof", rb_io_eof, 0); + rb_define_method(rb_cIO, "eof?", rb_io_eof, 0); + + rb_define_method(rb_cIO, "close", rb_io_close_m, 0); + rb_define_method(rb_cIO, "closed?", rb_io_closed, 0); + rb_define_method(rb_cIO, "close_read", rb_io_close_read, 0); + rb_define_method(rb_cIO, "close_write", rb_io_close_write, 0); + + rb_define_method(rb_cIO, "isatty", rb_io_isatty, 0); + rb_define_method(rb_cIO, "tty?", rb_io_isatty, 0); + rb_define_method(rb_cIO, "binmode", rb_io_binmode, 0); + rb_define_method(rb_cIO, "sysseek", rb_io_sysseek, -1); + + rb_define_method(rb_cIO, "ioctl", rb_io_ioctl, -1); + rb_define_method(rb_cIO, "fcntl", rb_io_fcntl, -1); + rb_define_method(rb_cIO, "pid", rb_io_pid, 0); + rb_define_method(rb_cIO, "inspect", rb_io_inspect, 0); + + rb_stdin = prep_stdio(stdin, FMODE_READABLE, rb_cIO, "<STDIN>"); + rb_define_variable("$stdin", &rb_stdin); + rb_stdout = prep_stdio(stdout, FMODE_WRITABLE, rb_cIO, "<STDOUT>"); + rb_define_hooked_variable("$stdout", &rb_stdout, 0, stdout_setter); + rb_stderr = prep_stdio(stderr, FMODE_WRITABLE|FMODE_SYNC, rb_cIO, "<STDERR>"); + rb_define_hooked_variable("$stderr", &rb_stderr, 0, stdout_setter); + rb_define_hooked_variable("$>", &rb_stdout, 0, stdout_setter); + orig_stdout = rb_stdout; + rb_deferr = orig_stderr = rb_stderr; + + /* variables to be removed in 1.8.1 */ + rb_define_hooked_variable("$defout", &rb_stdout, 0, defout_setter); + rb_define_hooked_variable("$deferr", &rb_stderr, 0, deferr_setter); + + /* constants to hold original stdin/stdout/stderr */ + rb_define_global_const("STDIN", rb_stdin); + rb_define_global_const("STDOUT", rb_stdout); + rb_define_global_const("STDERR", rb_stderr); + + argf = rb_obj_alloc(rb_cObject); + rb_extend_object(argf, rb_mEnumerable); + + rb_define_readonly_variable("$<", &argf); + rb_define_global_const("ARGF", argf); + + rb_define_singleton_method(argf, "to_s", argf_to_s, 0); + + rb_define_singleton_method(argf, "fileno", argf_fileno, 0); + rb_define_singleton_method(argf, "to_i", argf_fileno, 0); + rb_define_singleton_method(argf, "to_io", argf_to_io, 0); + rb_define_singleton_method(argf, "each", argf_each_line, -1); + rb_define_singleton_method(argf, "each_line", argf_each_line, -1); + rb_define_singleton_method(argf, "each_byte", argf_each_byte, 0); + + rb_define_singleton_method(argf, "read", argf_read, -1); + rb_define_singleton_method(argf, "readpartial", argf_readpartial, -1); + rb_define_singleton_method(argf, "readlines", rb_f_readlines, -1); + rb_define_singleton_method(argf, "to_a", rb_f_readlines, -1); + rb_define_singleton_method(argf, "gets", rb_f_gets, -1); + rb_define_singleton_method(argf, "readline", rb_f_readline, -1); + rb_define_singleton_method(argf, "getc", argf_getc, 0); + rb_define_singleton_method(argf, "readchar", argf_readchar, 0); + rb_define_singleton_method(argf, "tell", argf_tell, 0); + rb_define_singleton_method(argf, "seek", argf_seek_m, -1); + rb_define_singleton_method(argf, "rewind", argf_rewind, 0); + rb_define_singleton_method(argf, "pos", argf_tell, 0); + rb_define_singleton_method(argf, "pos=", argf_set_pos, 1); + rb_define_singleton_method(argf, "eof", argf_eof, 0); + rb_define_singleton_method(argf, "eof?", argf_eof, 0); + rb_define_singleton_method(argf, "binmode", argf_binmode, 0); + + rb_define_singleton_method(argf, "filename", argf_filename, 0); + rb_define_singleton_method(argf, "path", argf_filename, 0); + rb_define_singleton_method(argf, "file", argf_file, 0); + rb_define_singleton_method(argf, "skip", argf_skip, 0); + rb_define_singleton_method(argf, "close", argf_close_m, 0); + rb_define_singleton_method(argf, "closed?", argf_closed, 0); + + rb_define_singleton_method(argf, "lineno", argf_lineno, 0); + rb_define_singleton_method(argf, "lineno=", argf_set_lineno, 1); + + rb_global_variable(¤t_file); + filename = rb_str_new2("-"); + rb_define_readonly_variable("$FILENAME", &filename); + + rb_define_virtual_variable("$-i", opt_i_get, opt_i_set); + +#if defined (_WIN32) || defined(DJGPP) || defined(__CYGWIN__) || defined(__human68k__) + atexit(pipe_atexit); +#endif + + Init_File(); + + rb_define_method(rb_cFile, "initialize", rb_file_initialize, -1); + + rb_file_const("RDONLY", INT2FIX(O_RDONLY)); + rb_file_const("WRONLY", INT2FIX(O_WRONLY)); + rb_file_const("RDWR", INT2FIX(O_RDWR)); + rb_file_const("APPEND", INT2FIX(O_APPEND)); + rb_file_const("CREAT", INT2FIX(O_CREAT)); + rb_file_const("EXCL", INT2FIX(O_EXCL)); +#if defined(O_NDELAY) || defined(O_NONBLOCK) +# ifdef O_NONBLOCK + rb_file_const("NONBLOCK", INT2FIX(O_NONBLOCK)); +# else + rb_file_const("NONBLOCK", INT2FIX(O_NDELAY)); +# endif +#endif + rb_file_const("TRUNC", INT2FIX(O_TRUNC)); +#ifdef O_NOCTTY + rb_file_const("NOCTTY", INT2FIX(O_NOCTTY)); +#endif +#ifdef O_BINARY + rb_file_const("BINARY", INT2FIX(O_BINARY)); +#endif +#ifdef O_SYNC + rb_file_const("SYNC", INT2FIX(O_SYNC)); +#endif +} +/* C code produced by gperf version 2.7.2 */ +/* Command-line: gperf -p -j1 -i 1 -g -o -t -N rb_reserved_word -k'1,3,$' keywords */ +struct kwtable {char *name; int id[2]; enum lex_state_e state;}; +struct kwtable *rb_reserved_word _((const char *, unsigned int)); +#ifndef RIPPER +; + +#define TOTAL_KEYWORDS 40 +#define MIN_WORD_LENGTH 2 +#define MAX_WORD_LENGTH 8 +#define MIN_HASH_VALUE 6 +#define MAX_HASH_VALUE 55 +/* maximum key range = 50, duplicates = 0 */ + +#ifdef __GNUC__ +__inline +#else +#ifdef __cplusplus +inline +#endif +#endif +static unsigned int +hash (str, len) + register const char *str; + register unsigned int len; +{ + static unsigned char asso_values[] = + { + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 11, 56, 56, 36, 56, 1, 37, + 31, 1, 56, 56, 56, 56, 29, 56, 1, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 1, 56, 32, 1, 2, + 1, 1, 4, 23, 56, 17, 56, 20, 9, 2, + 9, 26, 14, 56, 5, 1, 1, 16, 56, 21, + 20, 9, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56 + }; + register int hval = len; + + switch (hval) + { + default: + case 3: + hval += asso_values[(unsigned char)str[2]]; + case 2: + case 1: + hval += asso_values[(unsigned char)str[0]]; + break; + } + return hval + asso_values[(unsigned char)str[len - 1]]; +} + +#ifdef __GNUC__ +__inline +#endif +struct kwtable * +rb_reserved_word (str, len) + register const char *str; + register unsigned int len; +{ + static struct kwtable wordlist[] = + { + {""}, {""}, {""}, {""}, {""}, {""}, + {"end", {kEND, kEND}, EXPR_END}, + {"else", {kELSE, kELSE}, EXPR_BEG}, + {"case", {kCASE, kCASE}, EXPR_VALUE}, + {"ensure", {kENSURE, kENSURE}, EXPR_BEG}, + {"module", {kMODULE, kMODULE}, EXPR_VALUE}, + {"elsif", {kELSIF, kELSIF}, EXPR_VALUE}, + {"def", {kDEF, kDEF}, EXPR_FNAME}, + {"rescue", {kRESCUE, kRESCUE_MOD}, EXPR_MID}, + {"not", {kNOT, kNOT}, EXPR_VALUE}, + {"then", {kTHEN, kTHEN}, EXPR_BEG}, + {"yield", {kYIELD, kYIELD}, EXPR_ARG}, + {"for", {kFOR, kFOR}, EXPR_VALUE}, + {"self", {kSELF, kSELF}, EXPR_END}, + {"false", {kFALSE, kFALSE}, EXPR_END}, + {"retry", {kRETRY, kRETRY}, EXPR_END}, + {"return", {kRETURN, kRETURN}, EXPR_MID}, + {"true", {kTRUE, kTRUE}, EXPR_END}, + {"if", {kIF, kIF_MOD}, EXPR_VALUE}, + {"defined?", {kDEFINED, kDEFINED}, EXPR_ARG}, + {"super", {kSUPER, kSUPER}, EXPR_ARG}, + {"undef", {kUNDEF, kUNDEF}, EXPR_FNAME}, + {"break", {kBREAK, kBREAK}, EXPR_MID}, + {"in", {kIN, kIN}, EXPR_VALUE}, + {"do", {kDO, kDO}, EXPR_BEG}, + {"nil", {kNIL, kNIL}, EXPR_END}, + {"until", {kUNTIL, kUNTIL_MOD}, EXPR_VALUE}, + {"unless", {kUNLESS, kUNLESS_MOD}, EXPR_VALUE}, + {"or", {kOR, kOR}, EXPR_VALUE}, + {"next", {kNEXT, kNEXT}, EXPR_MID}, + {"when", {kWHEN, kWHEN}, EXPR_VALUE}, + {"redo", {kREDO, kREDO}, EXPR_END}, + {"and", {kAND, kAND}, EXPR_VALUE}, + {"begin", {kBEGIN, kBEGIN}, EXPR_BEG}, + {"__LINE__", {k__LINE__, k__LINE__}, EXPR_END}, + {"class", {kCLASS, kCLASS}, EXPR_CLASS}, + {"__FILE__", {k__FILE__, k__FILE__}, EXPR_END}, + {"END", {klEND, klEND}, EXPR_END}, + {"BEGIN", {klBEGIN, klBEGIN}, EXPR_END}, + {"while", {kWHILE, kWHILE_MOD}, EXPR_VALUE}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, + {"alias", {kALIAS, kALIAS}, EXPR_FNAME} + }; + + if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) + { + register int key = hash (str, len); + + if (key <= MAX_HASH_VALUE && key >= 0) + { + register const char *s = wordlist[key].name; + + if (*str == *s && !strcmp (str + 1, s + 1)) + return &wordlist[key]; + } + } + return 0; +} +#endif +/********************************************************************** + + main.c - + + $Author: michal $ + $Date: 2004/06/23 12:59:01 $ + created at: Fri Aug 19 13:19:58 JST 1994 + + Copyright (C) 1993-2003 Yukihiro Matsumoto + +**********************************************************************/ + +#include "ruby.h" + +#if defined(__MACOS__) && defined(__MWERKS__) +#include <console.h> +#endif + +/* to link startup code with ObjC support */ +#if (defined(__APPLE__) || defined(__NeXT__)) && defined(__MACH__) +static void objcdummyfunction( void ) { objc_msgSend(); } +#endif + +int +main(argc, argv, envp) + int argc; + char **argv, **envp; +{ +#ifdef _WIN32 + NtInitialize(&argc, &argv); +#endif +#if defined(__MACOS__) && defined(__MWERKS__) + argc = ccommand(&argv); +#endif + + ruby_init(); + ruby_options(argc, argv); + ruby_run(); + return 0; +} +/********************************************************************** + + marshal.c - + + $Author: matz $ + $Date: 2005/03/04 06:47:42 $ + created at: Thu Apr 27 16:30:01 JST 1995 + + Copyright (C) 1993-2003 Yukihiro Matsumoto + +**********************************************************************/ + +#include "ruby.h" +#include "rubyio.h" +#include "st.h" +#include "util.h" + +#include <math.h> +#ifdef HAVE_FLOAT_H +#include <float.h> +#endif +#ifdef HAVE_IEEEFP_H +#include <ieeefp.h> +#endif + +#define BITSPERSHORT (2*CHAR_BIT) +#define SHORTMASK ((1<<BITSPERSHORT)-1) +#define SHORTDN(x) RSHIFT(x,BITSPERSHORT) + +#if SIZEOF_SHORT == SIZEOF_BDIGITS +#define SHORTLEN(x) (x) +#else +static int +shortlen(len, ds) + long len; + BDIGIT *ds; +{ + BDIGIT num; + int offset = 0; + + num = ds[len-1]; + while (num) { + num = SHORTDN(num); + offset++; + } + return (len - 1)*sizeof(BDIGIT)/2 + offset; +} +#define SHORTLEN(x) shortlen((x),d) +#endif + +#define MARSHAL_MAJOR 4 +#define MARSHAL_MINOR 8 + +#define TYPE_NIL '0' +#define TYPE_TRUE 'T' +#define TYPE_FALSE 'F' +#define TYPE_FIXNUM 'i' + +#define TYPE_EXTENDED 'e' +#define TYPE_UCLASS 'C' +#define TYPE_OBJECT 'o' +#define TYPE_DATA 'd' +#define TYPE_USERDEF 'u' +#define TYPE_USRMARSHAL 'U' +#define TYPE_FLOAT 'f' +#define TYPE_BIGNUM 'l' +#define TYPE_STRING '"' +#define TYPE_REGEXP '/' +#define TYPE_ARRAY '[' +#define TYPE_HASH '{' +#define TYPE_HASH_DEF '}' +#define TYPE_STRUCT 'S' +#define TYPE_MODULE_OLD 'M' +#define TYPE_CLASS 'c' +#define TYPE_MODULE 'm' + +#define TYPE_SYMBOL ':' +#define TYPE_SYMLINK ';' + +#define TYPE_IVAR 'I' +#define TYPE_LINK '@' + +static ID s_dump, s_load, s_mdump, s_mload; +static ID s_dump_data, s_load_data, s_alloc; +static ID s_getc, s_read, s_write, s_binmode; + +struct dump_arg { + VALUE obj; + VALUE str, dest; + st_table *symbols; + st_table *data; + int taint; +}; + +struct dump_call_arg { + VALUE obj; + struct dump_arg *arg; + int limit; +}; + +static VALUE +class2path(klass) + VALUE klass; +{ + VALUE path = rb_class_path(klass); + char *n = RSTRING(path)->ptr; + + if (n[0] == '#') { + rb_raise(rb_eTypeError, "can't dump anonymous %s %s", + (TYPE(klass) == T_CLASS ? "class" : "module"), + n); + } + if (rb_path2class(n) != rb_class_real(klass)) { + rb_raise(rb_eTypeError, "%s can't be referred", n); + } + return path; +} + +static void w_long _((long, struct dump_arg*)); + +static void +w_nbyte(s, n, arg) + char *s; + int n; + struct dump_arg *arg; +{ + VALUE buf = arg->str; + rb_str_buf_cat(buf, s, n); + if (arg->dest && RSTRING(buf)->len >= BUFSIZ) { + if (arg->taint) OBJ_TAINT(buf); + rb_io_write(arg->dest, buf); + rb_str_resize(buf, 0); + } +} + +static void +w_byte(c, arg) + char c; + struct dump_arg *arg; +{ + w_nbyte(&c, 1, arg); +} + +static void +w_bytes(s, n, arg) + char *s; + int n; + struct dump_arg *arg; +{ + w_long(n, arg); + w_nbyte(s, n, arg); +} + +static void +w_short(x, arg) + int x; + struct dump_arg *arg; +{ + w_byte((x >> 0) & 0xff, arg); + w_byte((x >> 8) & 0xff, arg); +} + +static void +w_long(x, arg) + long x; + struct dump_arg *arg; +{ + char buf[sizeof(long)+1]; + int i, len = 0; + +#if SIZEOF_LONG > 4 + if (!(RSHIFT(x, 31) == 0 || RSHIFT(x, 31) == -1)) { + /* big long does not fit in 4 bytes */ + rb_raise(rb_eTypeError, "long too big to dump"); + } +#endif + + if (x == 0) { + w_byte(0, arg); + return; + } + if (0 < x && x < 123) { + w_byte(x + 5, arg); + return; + } + if (-124 < x && x < 0) { + w_byte((x - 5)&0xff, arg); + return; + } + for (i=1;i<sizeof(long)+1;i++) { + buf[i] = x & 0xff; + x = RSHIFT(x,8); + if (x == 0) { + buf[0] = i; + break; + } + if (x == -1) { + buf[0] = -i; + break; + } + } + len = i; + for (i=0;i<=len;i++) { + w_byte(buf[i], arg); + } +} + +#ifdef DBL_MANT_DIG +#define DECIMAL_MANT (53-16) /* from IEEE754 double precision */ + +#if DBL_MANT_DIG > 32 +#define MANT_BITS 32 +#elif DBL_MANT_DIG > 24 +#define MANT_BITS 24 +#elif DBL_MANT_DIG > 16 +#define MANT_BITS 16 +#else +#define MANT_BITS 8 +#endif + +static int +save_mantissa(d, buf) + double d; + char *buf; +{ + int e, i = 0; + unsigned long m; + double n; + + d = modf(ldexp(frexp(fabs(d), &e), DECIMAL_MANT), &d); + if (d > 0) { + buf[i++] = 0; + do { + d = modf(ldexp(d, MANT_BITS), &n); + m = (unsigned long)n; +#if MANT_BITS > 24 + buf[i++] = m >> 24; +#endif +#if MANT_BITS > 16 + buf[i++] = m >> 16; +#endif +#if MANT_BITS > 8 + buf[i++] = m >> 8; +#endif + buf[i++] = m; + } while (d > 0); + while (!buf[i - 1]) --i; + } + return i; +} + +static double +load_mantissa(d, buf, len) + double d; + const char *buf; + int len; +{ + if (--len > 0 && !*buf++) { /* binary mantissa mark */ + int e, s = d < 0, dig = 0; + unsigned long m; + + modf(ldexp(frexp(fabs(d), &e), DECIMAL_MANT), &d); + do { + m = 0; + switch (len) { + default: m = *buf++ & 0xff; +#if MANT_BITS > 24 + case 3: m = (m << 8) | (*buf++ & 0xff); +#endif +#if MANT_BITS > 16 + case 2: m = (m << 8) | (*buf++ & 0xff); +#endif +#if MANT_BITS > 8 + case 1: m = (m << 8) | (*buf++ & 0xff); +#endif + } + dig -= len < MANT_BITS / 8 ? 8 * (unsigned)len : MANT_BITS; + d += ldexp((double)m, dig); + } while ((len -= MANT_BITS / 8) > 0); + d = ldexp(d, e - DECIMAL_MANT); + if (s) d = -d; + } + return d; +} +#else +#define load_mantissa(d, buf, len) (d) +#define save_mantissa(d, buf) 0 +#endif + +#ifdef DBL_DIG +#define FLOAT_DIG (DBL_DIG+2) +#else +#define FLOAT_DIG 17 +#endif + +static void +w_float(d, arg) + double d; + struct dump_arg *arg; +{ + char buf[100]; + + if (isinf(d)) { + if (d < 0) strcpy(buf, "-inf"); + else strcpy(buf, "inf"); + } + else if (isnan(d)) { + strcpy(buf, "nan"); + } + else if (d == 0.0) { + if (1.0/d < 0) strcpy(buf, "-0"); + else strcpy(buf, "0"); + } + else { + int len; + + /* xxx: should not use system's sprintf(3) */ + sprintf(buf, "%.*g", FLOAT_DIG, d); + len = strlen(buf); + w_bytes(buf, len + save_mantissa(d, buf + len), arg); + return; + } + w_bytes(buf, strlen(buf), arg); +} + +static void +w_symbol(id, arg) + ID id; + struct dump_arg *arg; +{ + char *sym = rb_id2name(id); + st_data_t num; + + if (st_lookup(arg->symbols, id, &num)) { + w_byte(TYPE_SYMLINK, arg); + w_long((long)num, arg); + } + else { + w_byte(TYPE_SYMBOL, arg); + w_bytes(sym, strlen(sym), arg); + st_add_direct(arg->symbols, id, arg->symbols->num_entries); + } +} + +static void +w_unique(s, arg) + char *s; + struct dump_arg *arg; +{ + if (s[0] == '#') { + rb_raise(rb_eTypeError, "can't dump anonymous class %s", s); + } + w_symbol(rb_intern(s), arg); +} + +static void w_object _((VALUE,struct dump_arg*,int)); + +static int +hash_each(key, value, arg) + VALUE key, value; + struct dump_call_arg *arg; +{ + w_object(key, arg->arg, arg->limit); + w_object(value, arg->arg, arg->limit); + return ST_CONTINUE; +} + +static void +w_extended(klass, arg, check) + VALUE klass; + struct dump_arg *arg; + int check; +{ + char *path; + + if (FL_TEST(klass, FL_SINGLETON)) { + if (check && RCLASS(klass)->m_tbl->num_entries || + (RCLASS(klass)->iv_tbl && RCLASS(klass)->iv_tbl->num_entries > 1)) { + rb_raise(rb_eTypeError, "singleton can't be dumped"); + } + klass = RCLASS(klass)->super; + } + while (BUILTIN_TYPE(klass) == T_ICLASS) { + path = rb_class2name(RBASIC(klass)->klass); + w_byte(TYPE_EXTENDED, arg); + w_unique(path, arg); + klass = RCLASS(klass)->super; + } +} + +static void +w_class(type, obj, arg, check) + int type; + VALUE obj; + struct dump_arg *arg; + int check; +{ + char *path; + + VALUE klass = CLASS_OF(obj); + w_extended(klass, arg, check); + w_byte(type, arg); + path = RSTRING(class2path(rb_class_real(klass)))->ptr; + w_unique(path, arg); +} + +static void +w_uclass(obj, base_klass, arg) + VALUE obj, base_klass; + struct dump_arg *arg; +{ + VALUE klass = CLASS_OF(obj); + + w_extended(klass, arg, Qtrue); + klass = rb_class_real(klass); + if (klass != base_klass) { + w_byte(TYPE_UCLASS, arg); + w_unique(RSTRING(class2path(klass))->ptr, arg); + } +} + +static int +w_obj_each(id, value, arg) + ID id; + VALUE value; + struct dump_call_arg *arg; +{ + w_symbol(id, arg->arg); + w_object(value, arg->arg, arg->limit); + return ST_CONTINUE; +} + +static void +w_ivar(tbl, arg) + st_table *tbl; + struct dump_call_arg *arg; +{ + if (tbl) { + w_long(tbl->num_entries, arg->arg); + st_foreach_safe(tbl, w_obj_each, (st_data_t)arg); + } + else { + w_long(0, arg->arg); + } +} + +static void +w_object(obj, arg, limit) + VALUE obj; + struct dump_arg *arg; + int limit; +{ + struct dump_call_arg c_arg; + st_table *ivtbl = 0; + st_data_t num; + + if (limit == 0) { + rb_raise(rb_eArgError, "exceed depth limit"); + } + + limit--; + c_arg.limit = limit; + c_arg.arg = arg; + + if (st_lookup(arg->data, obj, &num)) { + w_byte(TYPE_LINK, arg); + w_long((long)num, arg); + return; + } + + if (ivtbl = rb_generic_ivar_table(obj)) { + w_byte(TYPE_IVAR, arg); + } + if (obj == Qnil) { + w_byte(TYPE_NIL, arg); + } + else if (obj == Qtrue) { + w_byte(TYPE_TRUE, arg); + } + else if (obj == Qfalse) { + w_byte(TYPE_FALSE, arg); + } + else if (FIXNUM_P(obj)) { +#if SIZEOF_LONG <= 4 + w_byte(TYPE_FIXNUM, arg); + w_long(FIX2INT(obj), arg); +#else + if (RSHIFT((long)obj, 31) == 0 || RSHIFT((long)obj, 31) == -1) { + w_byte(TYPE_FIXNUM, arg); + w_long(FIX2LONG(obj), arg); + } + else { + w_object(rb_int2big(FIX2LONG(obj)), arg, limit); + } +#endif + } + else if (SYMBOL_P(obj)) { + w_symbol(SYM2ID(obj), arg); + } + else { + if (OBJ_TAINTED(obj)) arg->taint = Qtrue; + + st_add_direct(arg->data, obj, arg->data->num_entries); + if (rb_respond_to(obj, s_mdump)) { + VALUE v; + + v = rb_funcall(obj, s_mdump, 0, 0); + w_class(TYPE_USRMARSHAL, obj, arg, Qfalse); + w_object(v, arg, limit); + if (ivtbl) w_ivar(0, &c_arg); + return; + } + if (rb_respond_to(obj, s_dump)) { + VALUE v; + + v = rb_funcall(obj, s_dump, 1, INT2NUM(limit)); + if (TYPE(v) != T_STRING) { + rb_raise(rb_eTypeError, "_dump() must return string"); + } + if (!ivtbl && (ivtbl = rb_generic_ivar_table(v))) { + w_byte(TYPE_IVAR, arg); + } + w_class(TYPE_USERDEF, obj, arg, Qfalse); + w_bytes(RSTRING(v)->ptr, RSTRING(v)->len, arg); + if (ivtbl) { + w_ivar(ivtbl, &c_arg); + } + return; + } + + switch (BUILTIN_TYPE(obj)) { + case T_CLASS: + if (FL_TEST(obj, FL_SINGLETON)) { + rb_raise(rb_eTypeError, "singleton class can't be dumped"); + } + w_byte(TYPE_CLASS, arg); + { + VALUE path = class2path(obj); + w_bytes(RSTRING(path)->ptr, RSTRING(path)->len, arg); + } + break; + + case T_MODULE: + w_byte(TYPE_MODULE, arg); + { + VALUE path = class2path(obj); + w_bytes(RSTRING(path)->ptr, RSTRING(path)->len, arg); + } + break; + + case T_FLOAT: + w_byte(TYPE_FLOAT, arg); + w_float(RFLOAT(obj)->value, arg); + break; + + case T_BIGNUM: + w_byte(TYPE_BIGNUM, arg); + { + char sign = RBIGNUM(obj)->sign ? '+' : '-'; + long len = RBIGNUM(obj)->len; + BDIGIT *d = RBIGNUM(obj)->digits; + + w_byte(sign, arg); + w_long(SHORTLEN(len), arg); /* w_short? */ + while (len--) { +#if SIZEOF_BDIGITS > SIZEOF_SHORT + BDIGIT num = *d; + int i; + + for (i=0; i<SIZEOF_BDIGITS; i+=SIZEOF_SHORT) { + w_short(num & SHORTMASK, arg); + num = SHORTDN(num); + if (len == 0 && num == 0) break; + } +#else + w_short(*d, arg); +#endif + d++; + } + } + break; + + case T_STRING: + w_uclass(obj, rb_cString, arg); + w_byte(TYPE_STRING, arg); + w_bytes(RSTRING(obj)->ptr, RSTRING(obj)->len, arg); + break; + + case T_REGEXP: + w_uclass(obj, rb_cRegexp, arg); + w_byte(TYPE_REGEXP, arg); + w_bytes(RREGEXP(obj)->str, RREGEXP(obj)->len, arg); + w_byte(rb_reg_options(obj), arg); + break; + + case T_ARRAY: + w_uclass(obj, rb_cArray, arg); + w_byte(TYPE_ARRAY, arg); + { + long len = RARRAY(obj)->len; + VALUE *ptr = RARRAY(obj)->ptr; + + w_long(len, arg); + while (len--) { + w_object(*ptr, arg, limit); + ptr++; + } + } + break; + + case T_HASH: + w_uclass(obj, rb_cHash, arg); + if (NIL_P(RHASH(obj)->ifnone)) { + w_byte(TYPE_HASH, arg); + } + else if (FL_TEST(obj, FL_USER2)) { + /* FL_USER2 means HASH_PROC_DEFAULT (see hash.c) */ + rb_raise(rb_eTypeError, "can't dump hash with default proc"); + } + else { + w_byte(TYPE_HASH_DEF, arg); + } + w_long(RHASH(obj)->tbl->num_entries, arg); + rb_hash_foreach(obj, hash_each, (st_data_t)&c_arg); + if (!NIL_P(RHASH(obj)->ifnone)) { + w_object(RHASH(obj)->ifnone, arg, limit); + } + break; + + case T_STRUCT: + w_class(TYPE_STRUCT, obj, arg, Qtrue); + { + long len = RSTRUCT(obj)->len; + VALUE mem; + long i; + + w_long(len, arg); + mem = rb_struct_members(obj); + for (i=0; i<len; i++) { + w_symbol(SYM2ID(RARRAY(mem)->ptr[i]), arg); + w_object(RSTRUCT(obj)->ptr[i], arg, limit); + } + } + break; + + case T_OBJECT: + w_class(TYPE_OBJECT, obj, arg, Qtrue); + w_ivar(ROBJECT(obj)->iv_tbl, &c_arg); + break; + + case T_DATA: + { + VALUE v; + + w_class(TYPE_DATA, obj, arg, Qtrue); + if (!rb_respond_to(obj, s_dump_data)) { + rb_raise(rb_eTypeError, + "no marshal_dump is defined for class %s", + rb_obj_classname(obj)); + } + v = rb_funcall(obj, s_dump_data, 0); + w_object(v, arg, limit); + } + break; + + default: + rb_raise(rb_eTypeError, "can't dump %s", + rb_obj_classname(obj)); + break; + } + } + if (ivtbl) { + w_ivar(ivtbl, &c_arg); + } +} + +static VALUE +dump(arg) + struct dump_call_arg *arg; +{ + w_object(arg->obj, arg->arg, arg->limit); + if (arg->arg->dest) { + rb_io_write(arg->arg->dest, arg->arg->str); + rb_str_resize(arg->arg->str, 0); + } + return 0; +} + +static VALUE +dump_ensure(arg) + struct dump_arg *arg; +{ + st_free_table(arg->symbols); + st_free_table(arg->data); + if (arg->taint) { + OBJ_TAINT(arg->str); + } + return 0; +} + +/* + * call-seq: + * dump( obj [, anIO] , limit=--1 ) => anIO + * + * Serializes obj and all descendent objects. If anIO is + * specified, the serialized data will be written to it, otherwise the + * data will be returned as a String. If limit is specified, the + * traversal of subobjects will be limited to that depth. If limit is + * negative, no checking of depth will be performed. + * + * class Klass + * def initialize(str) + * @str = str + * end + * def sayHello + * @str + * end + * end + * + * (produces no output) + * + * o = Klass.new("hello\n") + * data = Marshal.dump(o) + * obj = Marshal.load(data) + * obj.sayHello #=> "hello\n" + */ +static VALUE +marshal_dump(argc, argv) + int argc; + VALUE* argv; +{ + VALUE obj, port, a1, a2; + int limit = -1; + struct dump_arg arg; + struct dump_call_arg c_arg; + + port = Qnil; + rb_scan_args(argc, argv, "12", &obj, &a1, &a2); + if (argc == 3) { + if (!NIL_P(a2)) limit = NUM2INT(a2); + if (NIL_P(a1)) goto type_error; + port = a1; + } + else if (argc == 2) { + if (FIXNUM_P(a1)) limit = FIX2INT(a1); + else if (NIL_P(a1)) goto type_error; + else port = a1; + } + arg.dest = 0; + if (!NIL_P(port)) { + if (!rb_respond_to(port, s_write)) { + type_error: + rb_raise(rb_eTypeError, "instance of IO needed"); + } + arg.str = rb_str_buf_new(0); + arg.dest = port; + if (rb_respond_to(port, s_binmode)) { + rb_funcall2(port, s_binmode, 0, 0); + } + } + else { + port = rb_str_buf_new(0); + arg.str = port; + } + + arg.symbols = st_init_numtable(); + arg.data = st_init_numtable(); + arg.taint = Qfalse; + c_arg.obj = obj; + c_arg.arg = &arg; + c_arg.limit = limit; + + w_byte(MARSHAL_MAJOR, &arg); + w_byte(MARSHAL_MINOR, &arg); + + rb_ensure(dump, (VALUE)&c_arg, dump_ensure, (VALUE)&arg); + + return port; +} + +struct load_arg { + VALUE src; + long offset; + st_table *symbols; + VALUE data; + VALUE proc; + int taint; +}; + +static VALUE r_object _((struct load_arg *arg)); + +static int +r_byte(arg) + struct load_arg *arg; +{ + int c; + + if (TYPE(arg->src) == T_STRING) { + if (RSTRING(arg->src)->len > arg->offset) { + c = (unsigned char)RSTRING(arg->src)->ptr[arg->offset++]; + } + else { + rb_raise(rb_eArgError, "marshal data too short"); + } + } + else { + VALUE src = arg->src; + VALUE v = rb_funcall2(src, s_getc, 0, 0); + if (NIL_P(v)) rb_eof_error(); + c = (unsigned char)FIX2INT(v); + } + return c; +} + +static void +long_toobig(size) + int size; +{ + rb_raise(rb_eTypeError, "long too big for this architecture (size %d, given %d)", + sizeof(long), size); +} + +#undef SIGN_EXTEND_CHAR +#if __STDC__ +# define SIGN_EXTEND_CHAR(c) ((signed char)(c)) +#else /* not __STDC__ */ +/* As in Harbison and Steele. */ +# define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128) +#endif + +static long +r_long(arg) + struct load_arg *arg; +{ + register long x; + int c = SIGN_EXTEND_CHAR(r_byte(arg)); + long i; + + if (c == 0) return 0; + if (c > 0) { + if (4 < c && c < 128) { + return c - 5; + } + if (c > sizeof(long)) long_toobig(c); + x = 0; + for (i=0;i<c;i++) { + x |= (long)r_byte(arg) << (8*i); + } + } + else { + if (-129 < c && c < -4) { + return c + 5; + } + c = -c; + if (c > sizeof(long)) long_toobig(c); + x = -1; + for (i=0;i<c;i++) { + x &= ~((long)0xff << (8*i)); + x |= (long)r_byte(arg) << (8*i); + } + } + return x; +} + +#define r_bytes(arg) r_bytes0(r_long(arg), (arg)) + +static VALUE +r_bytes0(len, arg) + long len; + struct load_arg *arg; +{ + VALUE str; + + if (len == 0) return rb_str_new(0, 0); + if (TYPE(arg->src) == T_STRING) { + if (RSTRING(arg->src)->len > arg->offset) { + str = rb_str_new(RSTRING(arg->src)->ptr+arg->offset, len); + arg->offset += len; + } + else { + too_short: + rb_raise(rb_eArgError, "marshal data too short"); + } + } + else { + VALUE src = arg->src; + VALUE n = LONG2NUM(len); + str = rb_funcall2(src, s_read, 1, &n); + if (NIL_P(str)) goto too_short; + StringValue(str); + if (RSTRING(str)->len != len) goto too_short; + if (OBJ_TAINTED(str)) arg->taint = Qtrue; + } + return str; +} + +static ID +r_symlink(arg) + struct load_arg *arg; +{ + ID id; + long num = r_long(arg); + + if (st_lookup(arg->symbols, num, &id)) { + return id; + } + rb_raise(rb_eArgError, "bad symbol"); +} + +static ID +r_symreal(arg) + struct load_arg *arg; +{ + ID id; + + id = rb_intern(RSTRING(r_bytes(arg))->ptr); + st_insert(arg->symbols, arg->symbols->num_entries, id); + + return id; +} + +static ID +r_symbol(arg) + struct load_arg *arg; +{ + if (r_byte(arg) == TYPE_SYMLINK) { + return r_symlink(arg); + } + return r_symreal(arg); +} + +static char* +r_unique(arg) + struct load_arg *arg; +{ + return rb_id2name(r_symbol(arg)); +} + +static VALUE +r_string(arg) + struct load_arg *arg; +{ + return r_bytes(arg); +} + +static VALUE +r_entry(v, arg) + VALUE v; + struct load_arg *arg; +{ + rb_hash_aset(arg->data, INT2FIX(RHASH(arg->data)->tbl->num_entries), v); + if (arg->taint) OBJ_TAINT(v); + return v; +} + +static void +r_ivar(obj, arg) + VALUE obj; + struct load_arg *arg; +{ + long len; + + len = r_long(arg); + if (len > 0) { + while (len--) { + ID id = r_symbol(arg); + VALUE val = r_object(arg); + rb_ivar_set(obj, id, val); + } + } +} + +static VALUE +path2class(path) + char *path; +{ + VALUE v = rb_path2class(path); + + if (TYPE(v) != T_CLASS) { + rb_raise(rb_eArgError, "%s does not refer class", path); + } + return v; +} + +static VALUE +path2module(path) + char *path; +{ + VALUE v = rb_path2class(path); + + if (TYPE(v) != T_MODULE) { + rb_raise(rb_eArgError, "%s does not refer module", path); + } + return v; +} + +static VALUE +r_object0(arg, proc, ivp, extmod) + struct load_arg *arg; + VALUE proc; + int *ivp; + VALUE extmod; +{ + VALUE v = Qnil; + int type = r_byte(arg); + long id; + + switch (type) { + case TYPE_LINK: + id = r_long(arg); + v = rb_hash_aref(arg->data, LONG2FIX(id)); + if (NIL_P(v)) { + rb_raise(rb_eArgError, "dump format error (unlinked)"); + } + return v; + + case TYPE_IVAR: + { + int ivar = Qtrue; + + v = r_object0(arg, 0, &ivar, extmod); + if (ivar) r_ivar(v, arg); + } + break; + + case TYPE_EXTENDED: + { + VALUE m = path2module(r_unique(arg)); + + if (NIL_P(extmod)) extmod = rb_ary_new2(0); + rb_ary_push(extmod, m); + + v = r_object0(arg, 0, 0, extmod); + while (RARRAY(extmod)->len > 0) { + m = rb_ary_pop(extmod); + rb_extend_object(v, m); + } + } + break; + + case TYPE_UCLASS: + { + VALUE c = path2class(r_unique(arg)); + + if (FL_TEST(c, FL_SINGLETON)) { + rb_raise(rb_eTypeError, "singleton can't be loaded"); + } + v = r_object0(arg, 0, 0, extmod); + if (rb_special_const_p(v) || TYPE(v) == T_OBJECT || TYPE(v) == T_CLASS) { + format_error: + rb_raise(rb_eArgError, "dump format error (user class)"); + } + if (TYPE(v) == T_MODULE || !RTEST(rb_class_inherited_p(c, RBASIC(v)->klass))) { + VALUE tmp = rb_obj_alloc(c); + + if (TYPE(v) != TYPE(tmp)) goto format_error; + } + RBASIC(v)->klass = c; + } + break; + + case TYPE_NIL: + v = Qnil; + break; + + case TYPE_TRUE: + v = Qtrue; + break; + + case TYPE_FALSE: + v = Qfalse; + break; + + case TYPE_FIXNUM: + { + long i = r_long(arg); + v = LONG2FIX(i); + } + break; + + case TYPE_FLOAT: + { + double d, t = 0.0; + VALUE str = r_bytes(arg); + const char *ptr = RSTRING(str)->ptr; + + if (strcmp(ptr, "nan") == 0) { + d = t / t; + } + else if (strcmp(ptr, "inf") == 0) { + d = 1.0 / t; + } + else if (strcmp(ptr, "-inf") == 0) { + d = -1.0 / t; + } + else { + char *e; + d = strtod(ptr, &e); + d = load_mantissa(d, e, RSTRING(str)->len - (e - ptr)); + } + v = rb_float_new(d); + r_entry(v, arg); + } + break; + + case TYPE_BIGNUM: + { + long len; + BDIGIT *digits; + VALUE data; + + NEWOBJ(big, struct RBignum); + OBJSETUP(big, rb_cBignum, T_BIGNUM); + big->sign = (r_byte(arg) == '+'); + len = r_long(arg); + data = r_bytes0(len * 2, arg); +#if SIZEOF_BDIGITS == SIZEOF_SHORT + big->len = len; +#else + big->len = (len + 1) * 2 / sizeof(BDIGIT); +#endif + big->digits = digits = ALLOC_N(BDIGIT, big->len); + MEMCPY(digits, RSTRING(data)->ptr, char, len * 2); +#if SIZEOF_BDIGITS > SIZEOF_SHORT + MEMZERO((char *)digits + len * 2, char, + big->len * sizeof(BDIGIT) - len * 2); +#endif + len = big->len; + while (len > 0) { + unsigned char *p = (unsigned char *)digits; + BDIGIT num = 0; +#if SIZEOF_BDIGITS > SIZEOF_SHORT + int shift = 0; + int i; + + for (i=0; i<SIZEOF_BDIGITS; i++) { + num |= (int)p[i] << shift; + shift += 8; + } +#else + num = p[0] | (p[1] << 8); +#endif + *digits++ = num; + len--; + } + v = rb_big_norm((VALUE)big); + r_entry(v, arg); + } + break; + + case TYPE_STRING: + v = r_entry(r_string(arg), arg); + break; + + case TYPE_REGEXP: + { + volatile VALUE str = r_bytes(arg); + int options = r_byte(arg); + v = r_entry(rb_reg_new(RSTRING(str)->ptr, RSTRING(str)->len, options), arg); + } + break; + + case TYPE_ARRAY: + { + volatile long len = r_long(arg); /* gcc 2.7.2.3 -O2 bug?? */ + + v = rb_ary_new2(len); + r_entry(v, arg); + while (len--) { + rb_ary_push(v, r_object(arg)); + } + } + break; + + case TYPE_HASH: + case TYPE_HASH_DEF: + { + long len = r_long(arg); + + v = rb_hash_new(); + r_entry(v, arg); + while (len--) { + VALUE key = r_object(arg); + VALUE value = r_object(arg); + rb_hash_aset(v, key, value); + } + if (type == TYPE_HASH_DEF) { + RHASH(v)->ifnone = r_object(arg); + } + } + break; + + case TYPE_STRUCT: + { + VALUE klass, mem, values; + volatile long i; /* gcc 2.7.2.3 -O2 bug?? */ + long len; + ID slot; + + klass = path2class(r_unique(arg)); + mem = rb_struct_s_members(klass); + if (mem == Qnil) { + rb_raise(rb_eTypeError, "uninitialized struct"); + } + len = r_long(arg); + + values = rb_ary_new2(len); + for (i=0; i<len; i++) { + rb_ary_push(values, Qnil); + } + v = rb_struct_alloc(klass, values); + r_entry(v, arg); + for (i=0; i<len; i++) { + slot = r_symbol(arg); + + if (RARRAY(mem)->ptr[i] != ID2SYM(slot)) { + rb_raise(rb_eTypeError, "struct %s not compatible (:%s for :%s)", + rb_class2name(klass), + rb_id2name(slot), + rb_id2name(SYM2ID(RARRAY(mem)->ptr[i]))); + } + rb_struct_aset(v, LONG2FIX(i), r_object(arg)); + } + } + break; + + case TYPE_USERDEF: + { + VALUE klass = path2class(r_unique(arg)); + VALUE data; + + if (!rb_respond_to(klass, s_load)) { + rb_raise(rb_eTypeError, "class %s needs to have method `_load'", + rb_class2name(klass)); + } + data = r_string(arg); + if (ivp) { + r_ivar(data, arg); + *ivp = Qfalse; + } + v = rb_funcall(klass, s_load, 1, data); + r_entry(v, arg); + } + break; + + case TYPE_USRMARSHAL: + { + VALUE klass = path2class(r_unique(arg)); + VALUE data; + + v = rb_obj_alloc(klass); + if (! NIL_P(extmod)) { + while (RARRAY(extmod)->len > 0) { + VALUE m = rb_ary_pop(extmod); + rb_extend_object(v, m); + } + } + if (!rb_respond_to(v, s_mload)) { + rb_raise(rb_eTypeError, "instance of %s needs to have method `marshal_load'", + rb_class2name(klass)); + } + r_entry(v, arg); + data = r_object(arg); + rb_funcall(v, s_mload, 1, data); + } + break; + + case TYPE_OBJECT: + { + VALUE klass = path2class(r_unique(arg)); + + v = rb_obj_alloc(klass); + if (TYPE(v) != T_OBJECT) { + rb_raise(rb_eArgError, "dump format error"); + } + r_entry(v, arg); + r_ivar(v, arg); + } + break; + + case TYPE_DATA: + { + VALUE klass = path2class(r_unique(arg)); + if (rb_respond_to(klass, s_alloc)) { + static int warn = Qtrue; + if (warn) { + rb_warn("define `allocate' instead of `_alloc'"); + warn = Qfalse; + } + v = rb_funcall(klass, s_alloc, 0); + } + else { + v = rb_obj_alloc(klass); + } + if (TYPE(v) != T_DATA) { + rb_raise(rb_eArgError, "dump format error"); + } + r_entry(v, arg); + if (!rb_respond_to(v, s_load_data)) { + rb_raise(rb_eTypeError, + "class %s needs to have instance method `_load_data'", + rb_class2name(klass)); + } + rb_funcall(v, s_load_data, 1, r_object0(arg, 0, 0, extmod)); + } + break; + + case TYPE_MODULE_OLD: + { + volatile VALUE str = r_bytes(arg); + + v = rb_path2class(RSTRING(str)->ptr); + r_entry(v, arg); + } + break; + + case TYPE_CLASS: + { + volatile VALUE str = r_bytes(arg); + + v = path2class(RSTRING(str)->ptr); + r_entry(v, arg); + } + break; + + case TYPE_MODULE: + { + volatile VALUE str = r_bytes(arg); + + v = path2module(RSTRING(str)->ptr); + r_entry(v, arg); + } + break; + + case TYPE_SYMBOL: + v = ID2SYM(r_symreal(arg)); + break; + + case TYPE_SYMLINK: + return ID2SYM(r_symlink(arg)); + + default: + rb_raise(rb_eArgError, "dump format error(0x%x)", type); + break; + } + if (proc) { + rb_funcall(proc, rb_intern("call"), 1, v); + } + return v; +} + +static VALUE +r_object(arg) + struct load_arg *arg; +{ + return r_object0(arg, arg->proc, 0, Qnil); +} + +static VALUE +load(arg) + struct load_arg *arg; +{ + return r_object(arg); +} + +static VALUE +load_ensure(arg) + struct load_arg *arg; +{ + st_free_table(arg->symbols); + return 0; +} + +/* + * call-seq: + * load( source [, proc] ) => obj + * restore( source [, proc] ) => obj + * + * Returns the result of converting the serialized data in source into a + * Ruby object (possibly with associated subordinate objects). source + * may be either an instance of IO or an object that responds to + * to_str. If proc is specified, it will be passed each object as it + * is deserialized. + */ +static VALUE +marshal_load(argc, argv) + int argc; + VALUE *argv; +{ + VALUE port, proc; + int major, minor; + VALUE v; + struct load_arg arg; + + rb_scan_args(argc, argv, "11", &port, &proc); + if (rb_respond_to(port, rb_intern("to_str"))) { + arg.taint = OBJ_TAINTED(port); /* original taintedness */ + StringValue(port); /* possible conversion */ + } + else if (rb_respond_to(port, s_getc) && rb_respond_to(port, s_read)) { + if (rb_respond_to(port, s_binmode)) { + rb_funcall2(port, s_binmode, 0, 0); + } + arg.taint = Qtrue; + } + else { + rb_raise(rb_eTypeError, "instance of IO needed"); + } + arg.src = port; + arg.offset = 0; + + major = r_byte(&arg); + minor = r_byte(&arg); + if (major != MARSHAL_MAJOR || minor > MARSHAL_MINOR) { + rb_raise(rb_eTypeError, "incompatible marshal file format (can't be read)\n\ +\tformat version %d.%d required; %d.%d given", + MARSHAL_MAJOR, MARSHAL_MINOR, major, minor); + } + if (RTEST(ruby_verbose) && minor != MARSHAL_MINOR) { + rb_warn("incompatible marshal file format (can be read)\n\ +\tformat version %d.%d required; %d.%d given", + MARSHAL_MAJOR, MARSHAL_MINOR, major, minor); + } + + arg.symbols = st_init_numtable(); + arg.data = rb_hash_new(); + if (NIL_P(proc)) arg.proc = 0; + else arg.proc = proc; + v = rb_ensure(load, (VALUE)&arg, load_ensure, (VALUE)&arg); + + return v; +} + +/* + * The marshaling library converts collections of Ruby objects into a + * byte stream, allowing them to be stored outside the currently + * active script. This data may subsequently be read and the original + * objects reconstituted. + * Marshaled data has major and minor version numbers stored along + * with the object information. In normal use, marshaling can only + * load data written with the same major version number and an equal + * or lower minor version number. If Ruby's ``verbose'' flag is set + * (normally using -d, -v, -w, or --verbose) the major and minor + * numbers must match exactly. Marshal versioning is independent of + * Ruby's version numbers. You can extract the version by reading the + * first two bytes of marshaled data. + * + * str = Marshal.dump("thing") + * RUBY_VERSION #=> "1.8.0" + * str[0] #=> 4 + * str[1] #=> 8 + * + * Some objects cannot be dumped: if the objects to be dumped include + * bindings, procedure or method objects, instances of class IO, or + * singleton objects, a TypeError will be raised. + * If your class has special serialization needs (for example, if you + * want to serialize in some specific format), or if it contains + * objects that would otherwise not be serializable, you can implement + * your own serialization strategy by defining two methods, _dump and + * _load: + * The instance method _dump should return a String object containing + * all the information necessary to reconstitute objects of this class + * and all referenced objects up to a maximum depth given as an integer + * parameter (a value of -1 implies that you should disable depth checking). + * The class method _load should take a String and return an object of this class. + */ +void +Init_marshal() +{ + VALUE rb_mMarshal = rb_define_module("Marshal"); + + s_dump = rb_intern("_dump"); + s_load = rb_intern("_load"); + s_mdump = rb_intern("marshal_dump"); + s_mload = rb_intern("marshal_load"); + s_dump_data = rb_intern("_dump_data"); + s_load_data = rb_intern("_load_data"); + s_alloc = rb_intern("_alloc"); + s_getc = rb_intern("getc"); + s_read = rb_intern("read"); + s_write = rb_intern("write"); + s_binmode = rb_intern("binmode"); + + rb_define_module_function(rb_mMarshal, "dump", marshal_dump, -1); + rb_define_module_function(rb_mMarshal, "load", marshal_load, -1); + rb_define_module_function(rb_mMarshal, "restore", marshal_load, -1); + + rb_define_const(rb_mMarshal, "MAJOR_VERSION", INT2FIX(MARSHAL_MAJOR)); + rb_define_const(rb_mMarshal, "MINOR_VERSION", INT2FIX(MARSHAL_MINOR)); +} + +VALUE +rb_marshal_dump(obj, port) + VALUE obj, port; +{ + int argc = 1; + VALUE argv[2]; + + argv[0] = obj; + argv[1] = port; + if (!NIL_P(port)) argc = 2; + return marshal_dump(argc, argv); +} + +VALUE +rb_marshal_load(port) + VALUE port; +{ + return marshal_load(1, &port); +} +/********************************************************************** + + math.c - + + $Author: matz $ + $Date: 2004/09/03 17:38:34 $ + created at: Tue Jan 25 14:12:56 JST 1994 + + Copyright (C) 1993-2003 Yukihiro Matsumoto + +**********************************************************************/ + +#include "ruby.h" +#include <math.h> +#include <errno.h> + +VALUE rb_mMath; + +#define Need_Float(x) (x) = rb_Float(x) +#define Need_Float2(x,y) do {\ + Need_Float(x);\ + Need_Float(y);\ +} while (0) + + +/* + * call-seq: + * Math.atan2(y, x) => float + * + * Computes the arc tangent given <i>y</i> and <i>x</i>. Returns + * -PI..PI. + * + */ + +static VALUE +math_atan2(obj, y, x) + VALUE obj, x, y; +{ + Need_Float2(y, x); + return rb_float_new(atan2(RFLOAT(y)->value, RFLOAT(x)->value)); +} + + + +/* + * call-seq: + * Math.cos(x) => float + * + * Computes the cosine of <i>x</i> (expressed in radians). Returns + * -1..1. + */ + +static VALUE +math_cos(obj, x) + VALUE obj, x; +{ + Need_Float(x); + return rb_float_new(cos(RFLOAT(x)->value)); +} + +/* + * call-seq: + * Math.sin(x) => float + * + * Computes the sine of <i>x</i> (expressed in radians). Returns + * -1..1. + */ + +static VALUE +math_sin(obj, x) + VALUE obj, x; +{ + Need_Float(x); + + return rb_float_new(sin(RFLOAT(x)->value)); +} + + +/* + * call-seq: + * Math.tan(x) => float + * + * Returns the tangent of <i>x</i> (expressed in radians). + */ + +static VALUE +math_tan(obj, x) + VALUE obj, x; +{ + Need_Float(x); + + return rb_float_new(tan(RFLOAT(x)->value)); +} + +/* + * call-seq: + * Math.acos(x) => float + * + * Computes the arc cosine of <i>x</i>. Returns 0..PI. + */ + +static VALUE +math_acos(obj, x) + VALUE obj, x; +{ + double d; + + Need_Float(x); + errno = 0; + d = acos(RFLOAT(x)->value); + if (errno) { + rb_sys_fail("acos"); + } + return rb_float_new(d); +} + +/* + * call-seq: + * Math.asin(x) => float + * + * Computes the arc sine of <i>x</i>. Returns 0..PI. + */ + +static VALUE +math_asin(obj, x) + VALUE obj, x; +{ + double d; + + Need_Float(x); + errno = 0; + d = asin(RFLOAT(x)->value); + if (errno) { + rb_sys_fail("asin"); + } + return rb_float_new(d); +} + +/* + * call-seq: + * Math.atan(x) => float + * + * Computes the arc tangent of <i>x</i>. Returns -{PI/2} .. {PI/2}. + */ + +static VALUE +math_atan(obj, x) + VALUE obj, x; +{ + Need_Float(x); + return rb_float_new(atan(RFLOAT(x)->value)); +} + +#ifndef HAVE_COSH +double +cosh(x) + double x; +{ + return (exp(x) + exp(-x)) / 2; +} +#endif + +/* + * call-seq: + * Math.cosh(x) => float + * + * Computes the hyperbolic cosine of <i>x</i> (expressed in radians). + */ + +static VALUE +math_cosh(obj, x) + VALUE obj, x; +{ + Need_Float(x); + + return rb_float_new(cosh(RFLOAT(x)->value)); +} + +#ifndef HAVE_SINH +double +sinh(x) + double x; +{ + return (exp(x) - exp(-x)) / 2; +} +#endif + +/* + * call-seq: + * Math.sinh(x) => float + * + * Computes the hyperbolic sine of <i>x</i> (expressed in + * radians). + */ + +static VALUE +math_sinh(obj, x) + VALUE obj, x; +{ + Need_Float(x); + return rb_float_new(sinh(RFLOAT(x)->value)); +} + +#ifndef HAVE_TANH +double +tanh(x) + double x; +{ + return sinh(x) / cosh(x); +} +#endif + +/* + * call-seq: + * Math.tanh() => float + * + * Computes the hyperbolic tangent of <i>x</i> (expressed in + * radians). + */ + +static VALUE +math_tanh(obj, x) + VALUE obj, x; +{ + Need_Float(x); + return rb_float_new(tanh(RFLOAT(x)->value)); +} + +/* + * call-seq: + * Math.acosh(x) => float + * + * Computes the inverse hyperbolic cosine of <i>x</i>. + */ + +static VALUE +math_acosh(obj, x) + VALUE obj, x; +{ + double d; + + Need_Float(x); + errno = 0; + d = acosh(RFLOAT(x)->value); + if (errno) { + rb_sys_fail("acosh"); + } + return rb_float_new(d); +} + +/* + * call-seq: + * Math.asinh(x) => float + * + * Computes the inverse hyperbolic sine of <i>x</i>. + */ + +static VALUE +math_asinh(obj, x) + VALUE obj, x; +{ + Need_Float(x); + return rb_float_new(asinh(RFLOAT(x)->value)); +} + +/* + * call-seq: + * Math.atanh(x) => float + * + * Computes the inverse hyperbolic tangent of <i>x</i>. + */ + +static VALUE +math_atanh(obj, x) + VALUE obj, x; +{ + double d; + + Need_Float(x); + errno = 0; + d = atanh(RFLOAT(x)->value); + if (errno) { + rb_sys_fail("atanh"); + } + return rb_float_new(d); +} + +/* + * call-seq: + * Math.exp(x) => float + * + * Returns e**x. + */ + +static VALUE +math_exp(obj, x) + VALUE obj, x; +{ + Need_Float(x); + return rb_float_new(exp(RFLOAT(x)->value)); +} + +#if defined __CYGWIN__ +# include <cygwin/version.h> +# if CYGWIN_VERSION_DLL_MAJOR < 1005 +# define nan(x) nan() +# endif +# define log(x) ((x) < 0.0 ? nan("") : log(x)) +# define log10(x) ((x) < 0.0 ? nan("") : log10(x)) +#endif + +/* + * call-seq: + * Math.log(numeric) => float + * + * Returns the natural logarithm of <i>numeric</i>. + */ + +static VALUE +math_log(obj, x) + VALUE obj, x; +{ + double d; + + Need_Float(x); + errno = 0; + d = log(RFLOAT(x)->value); + if (errno) { + rb_sys_fail("log"); + } + return rb_float_new(d); +} + +/* + * call-seq: + * Math.log10(numeric) => float + * + * Returns the base 10 logarithm of <i>numeric</i>. + */ + +static VALUE +math_log10(obj, x) + VALUE obj, x; +{ + double d; + + Need_Float(x); + errno = 0; + d = log10(RFLOAT(x)->value); + if (errno) { + rb_sys_fail("log10"); + } + return rb_float_new(d); +} + +/* + * call-seq: + * Math.sqrt(numeric) => float + * + * Returns the non-negative square root of <i>numeric</i>. Raises + * <code>ArgError</code> if <i>numeric</i> is less than zero. + */ + +static VALUE +math_sqrt(obj, x) + VALUE obj, x; +{ + double d; + + Need_Float(x); + errno = 0; + d = sqrt(RFLOAT(x)->value); + if (errno) { + rb_sys_fail("sqrt"); + } + return rb_float_new(d); +} + +/* + * call-seq: + * Math.frexp(numeric) => [ fraction, exponent ] + * + * Returns a two-element array containing the normalized fraction (a + * <code>Float</code>) and exponent (a <code>Fixnum</code>) of + * <i>numeric</i>. + * + * fraction, exponent = Math.frexp(1234) #=> [0.6025390625, 11] + * fraction * 2**exponent #=> 1234.0 + */ + +static VALUE +math_frexp(obj, x) + VALUE obj, x; +{ + double d; + int exp; + + Need_Float(x); + + d = frexp(RFLOAT(x)->value, &exp); + return rb_assoc_new(rb_float_new(d), INT2NUM(exp)); +} + +/* + * call-seq: + * Math.ldexp(flt, int) -> float + * + * Returns the value of <i>flt</i>*(2**<i>int</i>). + * + * fraction, exponent = Math.frexp(1234) + * Math.ldexp(fraction, exponent) #=> 1234.0 + */ + +static VALUE +math_ldexp(obj, x, n) + VALUE obj, x, n; +{ + Need_Float(x); + return rb_float_new(ldexp(RFLOAT(x)->value, NUM2INT(n))); +} + +/* + * call-seq: + * Math.hypot(x, y) => float + * + * Returns sqrt(x**2 + y**2), the hypotenuse of a right-angled triangle + * with sides <i>x</i> and <i>y</i>. + * + * Math.hypot(3, 4) #=> 5.0 + */ + +static VALUE +math_hypot(obj, x, y) + VALUE obj, x, y; +{ + Need_Float2(x, y); + return rb_float_new(hypot(RFLOAT(x)->value, RFLOAT(y)->value)); +} + +/* + * call-seq: + * Math.erf(x) => float + * + * Calculates the error function of x. + */ + +static VALUE +math_erf(obj, x) + VALUE obj, x; +{ + Need_Float(x); + return rb_float_new(erf(RFLOAT(x)->value)); +} + +/* + * call-seq: + * Math.erfc(x) => float + * + * Calculates the complementary error function of x. + */ + +static VALUE +math_erfc(obj, x) + VALUE obj, x; +{ + Need_Float(x); + return rb_float_new(erfc(RFLOAT(x)->value)); +} + +/* + * The <code>Math</code> module contains module functions for basic + * trigonometric and transcendental functions. See class + * <code>Float</code> for a list of constants that + * define Ruby's floating point accuracy. + */ + + +void +Init_Math() +{ + rb_mMath = rb_define_module("Math"); + +#ifdef M_PI + rb_define_const(rb_mMath, "PI", rb_float_new(M_PI)); +#else + rb_define_const(rb_mMath, "PI", rb_float_new(atan(1.0)*4.0)); +#endif + +#ifdef M_E + rb_define_const(rb_mMath, "E", rb_float_new(M_E)); +#else + rb_define_const(rb_mMath, "E", rb_float_new(exp(1.0))); +#endif + + rb_define_module_function(rb_mMath, "atan2", math_atan2, 2); + rb_define_module_function(rb_mMath, "cos", math_cos, 1); + rb_define_module_function(rb_mMath, "sin", math_sin, 1); + rb_define_module_function(rb_mMath, "tan", math_tan, 1); + + rb_define_module_function(rb_mMath, "acos", math_acos, 1); + rb_define_module_function(rb_mMath, "asin", math_asin, 1); + rb_define_module_function(rb_mMath, "atan", math_atan, 1); + + rb_define_module_function(rb_mMath, "cosh", math_cosh, 1); + rb_define_module_function(rb_mMath, "sinh", math_sinh, 1); + rb_define_module_function(rb_mMath, "tanh", math_tanh, 1); + + rb_define_module_function(rb_mMath, "acosh", math_acosh, 1); + rb_define_module_function(rb_mMath, "asinh", math_asinh, 1); + rb_define_module_function(rb_mMath, "atanh", math_atanh, 1); + + rb_define_module_function(rb_mMath, "exp", math_exp, 1); + rb_define_module_function(rb_mMath, "log", math_log, 1); + rb_define_module_function(rb_mMath, "log10", math_log10, 1); + rb_define_module_function(rb_mMath, "sqrt", math_sqrt, 1); + + rb_define_module_function(rb_mMath, "frexp", math_frexp, 1); + rb_define_module_function(rb_mMath, "ldexp", math_ldexp, 2); + + rb_define_module_function(rb_mMath, "hypot", math_hypot, 2); + + rb_define_module_function(rb_mMath, "erf", math_erf, 1); + rb_define_module_function(rb_mMath, "erfc", math_erfc, 1); +} +/********************************************************************** + + numeric.c - + + $Author: matz $ + $Date: 2005/04/18 06:38:30 $ + created at: Fri Aug 13 18:33:09 JST 1993 + + Copyright (C) 1993-2003 Yukihiro Matsumoto + +**********************************************************************/ + +#include "ruby.h" +#include "env.h" +#include <ctype.h> +#include <math.h> +#include <stdio.h> + +#if defined(__FreeBSD__) && __FreeBSD__ < 4 +#include <floatingpoint.h> +#endif + +#ifdef HAVE_FLOAT_H +#include <float.h> +#endif + +#ifdef HAVE_IEEEFP_H +#include <ieeefp.h> +#endif + +/* use IEEE 64bit values if not defined */ +#ifndef FLT_RADIX +#define FLT_RADIX 2 +#endif +#ifndef FLT_ROUNDS +#define FLT_ROUNDS 1 +#endif +#ifndef DBL_MIN +#define DBL_MIN 2.2250738585072014e-308 +#endif +#ifndef DBL_MAX +#define DBL_MAX 1.7976931348623157e+308 +#endif +#ifndef DBL_MIN_EXP +#define DBL_MIN_EXP (-1021) +#endif +#ifndef DBL_MAX_EXP +#define DBL_MAX_EXP 1024 +#endif +#ifndef DBL_MIN_10_EXP +#define DBL_MIN_10_EXP (-307) +#endif +#ifndef DBL_MAX_10_EXP +#define DBL_MAX_10_EXP 308 +#endif +#ifndef DBL_DIG +#define DBL_DIG 15 +#endif +#ifndef DBL_MANT_DIG +#define DBL_MANT_DIG 53 +#endif +#ifndef DBL_EPSILON +#define DBL_EPSILON 2.2204460492503131e-16 +#endif + +static ID id_coerce, id_to_i, id_eq; + +VALUE rb_cNumeric; +VALUE rb_cFloat; +VALUE rb_cInteger; +VALUE rb_cFixnum; + +VALUE rb_eZeroDivError; +VALUE rb_eFloatDomainError; + +void +rb_num_zerodiv() +{ + rb_raise(rb_eZeroDivError, "divided by 0"); +} + + +/* + * call-seq: + * num.coerce(numeric) => array + * + * If <i>aNumeric</i> is the same type as <i>num</i>, returns an array + * containing <i>aNumeric</i> and <i>num</i>. Otherwise, returns an + * array with both <i>aNumeric</i> and <i>num</i> represented as + * <code>Float</code> objects. This coercion mechanism is used by + * Ruby to handle mixed-type numeric operations: it is intended to + * find a compatible common type between the two operands of the operator. + * + * 1.coerce(2.5) #=> [2.5, 1.0] + * 1.2.coerce(3) #=> [3.0, 1.2] + * 1.coerce(2) #=> [2, 1] + */ + +static VALUE +num_coerce(x, y) + VALUE x, y; +{ + if (CLASS_OF(x) == CLASS_OF(y)) + return rb_assoc_new(y, x); + return rb_assoc_new(rb_Float(y), rb_Float(x)); +} + +static VALUE +coerce_body(x) + VALUE *x; +{ + return rb_funcall(x[1], id_coerce, 1, x[0]); +} + +static VALUE +coerce_rescue(x) + VALUE *x; +{ + volatile VALUE v = rb_inspect(x[1]); + + rb_raise(rb_eTypeError, "%s can't be coerced into %s", + rb_special_const_p(x[1])? + RSTRING(v)->ptr: + rb_obj_classname(x[1]), + rb_obj_classname(x[0])); + return Qnil; /* dummy */ +} + +static int +do_coerce(x, y, err) + VALUE *x, *y; + int err; +{ + VALUE ary; + VALUE a[2]; + + a[0] = *x; a[1] = *y; + + ary = rb_rescue(coerce_body, (VALUE)a, err?coerce_rescue:0, (VALUE)a); + if (TYPE(ary) != T_ARRAY || RARRAY(ary)->len != 2) { + if (err) { + rb_raise(rb_eTypeError, "coerce must return [x, y]"); + } + return Qfalse; + } + + *x = RARRAY(ary)->ptr[0]; + *y = RARRAY(ary)->ptr[1]; + return Qtrue; +} + +VALUE +rb_num_coerce_bin(x, y) + VALUE x, y; +{ + do_coerce(&x, &y, Qtrue); + return rb_funcall(x, rb_frame_this_func(), 1, y); +} + +VALUE +rb_num_coerce_cmp(x, y) + VALUE x, y; +{ + if (do_coerce(&x, &y, Qfalse)) + return rb_funcall(x, rb_frame_this_func(), 1, y); + return Qnil; +} + +VALUE +rb_num_coerce_relop(x, y) + VALUE x, y; +{ + VALUE c, x0 = x, y0 = y; + + if (!do_coerce(&x, &y, Qfalse) || + NIL_P(c = rb_funcall(x, rb_frame_this_func(), 1, y))) { + rb_cmperr(x0, y0); + return Qnil; /* not reached */ + } + return c; +} + +/* + * Trap attempts to add methods to <code>Numeric</code> objects. Always + * raises a <code>TypeError</code> + */ + +static VALUE +num_sadded(x, name) + VALUE x, name; +{ + ruby_frame = ruby_frame->prev; /* pop frame for "singleton_method_added" */ + /* Numerics should be values; singleton_methods should not be added to them */ + rb_raise(rb_eTypeError, + "can't define singleton method \"%s\" for %s", + rb_id2name(rb_to_id(name)), + rb_obj_classname(x)); + return Qnil; /* not reached */ +} + +/* :nodoc: */ +static VALUE +num_init_copy(x, y) + VALUE x, y; +{ + /* Numerics are immutable values, which should not be copied */ + rb_raise(rb_eTypeError, "can't copy %s", rb_obj_classname(x)); + return Qnil; /* not reached */ +} + +/* + * call-seq: + * +num => num + * + * Unary Plus---Returns the receiver's value. + */ + +static VALUE +num_uplus(num) + VALUE num; +{ + return num; +} + +/* + * call-seq: + * -num => numeric + * + * Unary Minus---Returns the receiver's value, negated. + */ + +static VALUE +num_uminus(num) + VALUE num; +{ + VALUE zero; + + zero = INT2FIX(0); + do_coerce(&zero, &num, Qtrue); + + return rb_funcall(zero, '-', 1, num); +} + +/* + * call-seq: + * num.quo(numeric) => result + * + * Equivalent to <code>Numeric#/</code>, but overridden in subclasses. + */ + +static VALUE +num_quo(x, y) + VALUE x, y; +{ + return rb_funcall(x, '/', 1, y); +} + + +/* + * call-seq: + * num.div(numeric) => integer + * + * Uses <code>/</code> to perform division, then converts the result to + * an integer. <code>Numeric</code> does not define the <code>/</code> + * operator; this is left to subclasses. + */ + +static VALUE +num_div(x, y) + VALUE x, y; +{ + return rb_Integer(rb_funcall(x, '/', 1, y)); +} + + + +/* + * call-seq: + * num.divmod( aNumeric ) -> anArray + * + * Returns an array containing the quotient and modulus obtained by + * dividing <i>num</i> by <i>aNumeric</i>. If <code>q, r = + * x.divmod(y)</code>, then + * + * q = floor(float(x)/float(y)) + * x = q*y + r + * + * The quotient is rounded toward -infinity, as shown in the following table: + * + * a | b | a.divmod(b) | a/b | a.modulo(b) | a.remainder(b) + * ------+-----+---------------+---------+-------------+--------------- + * 13 | 4 | 3, 1 | 3 | 1 | 1 + * ------+-----+---------------+---------+-------------+--------------- + * 13 | -4 | -4, -3 | -3 | -3 | 1 + * ------+-----+---------------+---------+-------------+--------------- + * -13 | 4 | -4, 3 | -4 | 3 | -1 + * ------+-----+---------------+---------+-------------+--------------- + * -13 | -4 | 3, -1 | 3 | -1 | -1 + * ------+-----+---------------+---------+-------------+--------------- + * 11.5 | 4 | 2.0, 3.5 | 2.875 | 3.5 | 3.5 + * ------+-----+---------------+---------+-------------+--------------- + * 11.5 | -4 | -3.0, -0.5 | -2.875 | -0.5 | 3.5 + * ------+-----+---------------+---------+-------------+--------------- + * -11.5 | 4 | -3.0 0.5 | -2.875 | 0.5 | -3.5 + * ------+-----+---------------+---------+-------------+--------------- + * -11.5 | -4 | 2.0 -3.5 | 2.875 | -3.5 | -3.5 + * + * + * Examples + * 11.divmod(3) #=> [3, 2] + * 11.divmod(-3) #=> [-4, -1] + * 11.divmod(3.5) #=> [3.0, 0.5] + * (-11).divmod(3.5) #=> [-4.0, 3.0] + * (11.5).divmod(3.5) #=> [3.0, 1.0] + */ + +static VALUE +num_divmod(x, y) + VALUE x, y; +{ + return rb_assoc_new(num_div(x, y), rb_funcall(x, '%', 1, y)); +} + +/* + * call-seq: + * num.modulo(numeric) => result + * + * Equivalent to + * <i>num</i>.<code>divmod(</code><i>aNumeric</i><code>)[1]</code>. + */ + +static VALUE +num_modulo(x, y) + VALUE x, y; +{ + return rb_funcall(x, '%', 1, y); +} + +/* + * call-seq: + * num.remainder(numeric) => result + * + * If <i>num</i> and <i>numeric</i> have different signs, returns + * <em>mod</em>-<i>numeric</i>; otherwise, returns <em>mod</em>. In + * both cases <em>mod</em> is the value + * <i>num</i>.<code>modulo(</code><i>numeric</i><code>)</code>. The + * differences between <code>remainder</code> and modulo + * (<code>%</code>) are shown in the table under <code>Numeric#divmod</code>. + */ + +static VALUE +num_remainder(x, y) + VALUE x, y; +{ + VALUE z = rb_funcall(x, '%', 1, y); + + if ((!rb_equal(z, INT2FIX(0))) && + ((RTEST(rb_funcall(x, '<', 1, INT2FIX(0))) && + RTEST(rb_funcall(y, '>', 1, INT2FIX(0)))) || + (RTEST(rb_funcall(x, '>', 1, INT2FIX(0))) && + RTEST(rb_funcall(y, '<', 1, INT2FIX(0)))))) { + return rb_funcall(z, '-', 1, y); + } + return z; +} + +/* + * call-seq: + * num.integer? -> true or false + * + * Returns <code>true</code> if <i>num</i> is an <code>Integer</code> + * (including <code>Fixnum</code> and <code>Bignum</code>). + */ + +static VALUE +num_int_p(num) + VALUE num; +{ + return Qfalse; +} + +/* + * call-seq: + * num.abs => num or numeric + * + * Returns the absolute value of <i>num</i>. + * + * 12.abs #=> 12 + * (-34.56).abs #=> 34.56 + * -34.56.abs #=> 34.56 + */ + +static VALUE +num_abs(num) + VALUE num; +{ + if (RTEST(rb_funcall(num, '<', 1, INT2FIX(0)))) { + return rb_funcall(num, rb_intern("-@"), 0); + } + return num; +} + + +/* + * call-seq: + * num.zero? => true or false + * + * Returns <code>true</code> if <i>num</i> has a zero value. + */ + +static VALUE +num_zero_p(num) + VALUE num; +{ + if (rb_equal(num, INT2FIX(0))) { + return Qtrue; + } + return Qfalse; +} + + +/* + * call-seq: + * num.nonzero? => num or nil + * + * Returns <i>num</i> if <i>num</i> is not zero, <code>nil</code> + * otherwise. This behavior is useful when chaining comparisons: + * + * a = %w( z Bb bB bb BB a aA Aa AA A ) + * b = a.sort {|a,b| (a.downcase <=> b.downcase).nonzero? || a <=> b } + * b #=> ["A", "a", "AA", "Aa", "aA", "BB", "Bb", "bB", "bb", "z"] + */ + +static VALUE +num_nonzero_p(num) + VALUE num; +{ + if (RTEST(rb_funcall(num, rb_intern("zero?"), 0, 0))) { + return Qnil; + } + return num; +} + +/* + * call-seq: + * num.to_int => integer + * + * Invokes the child class's <code>to_i</code> method to convert + * <i>num</i> to an integer. + */ + +static VALUE +num_to_int(num) + VALUE num; +{ + return rb_funcall(num, id_to_i, 0, 0); +} + + +/******************************************************************** + * + * Document-class: Float + * + * <code>Float</code> objects represent real numbers using the native + * architecture's double-precision floating point representation. + */ + +VALUE +rb_float_new(d) + double d; +{ + NEWOBJ(flt, struct RFloat); + OBJSETUP(flt, rb_cFloat, T_FLOAT); + + flt->value = d; + return (VALUE)flt; +} + +/* + * call-seq: + * flt.to_s => string + * + * Returns a string containing a representation of self. As well as a + * fixed or exponential form of the number, the call may return + * ``<code>NaN</code>'', ``<code>Infinity</code>'', and + * ``<code>-Infinity</code>''. + */ + +static VALUE +flo_to_s(flt) + VALUE flt; +{ + char buf[32]; + double value = RFLOAT(flt)->value; + char *p, *e; + + if (isinf(value)) + return rb_str_new2(value < 0 ? "-Infinity" : "Infinity"); + else if(isnan(value)) + return rb_str_new2("NaN"); + + sprintf(buf, "%#.15g", value); /* ensure to print decimal point */ + if (!(e = strchr(buf, 'e'))) { + e = buf + strlen(buf); + } + if (!ISDIGIT(e[-1])) { /* reformat if ended with decimal point (ex 111111111111111.) */ + sprintf(buf, "%#.14e", value); + if (!(e = strchr(buf, 'e'))) { + e = buf + strlen(buf); + } + } + p = e; + while (p[-1]=='0' && ISDIGIT(p[-2])) + p--; + memmove(p, e, strlen(e)+1); + return rb_str_new2(buf); +} + +/* + * MISSING: documentation + */ + +static VALUE +flo_coerce(x, y) + VALUE x, y; +{ + return rb_assoc_new(rb_Float(y), x); +} + +/* + * call-seq: + * -float => float + * + * Returns float, negated. + */ + +static VALUE +flo_uminus(flt) + VALUE flt; +{ + return rb_float_new(-RFLOAT(flt)->value); +} + +/* + * call-seq: + * float + other => float + * + * Returns a new float which is the sum of <code>float</code> + * and <code>other</code>. + */ + +static VALUE +flo_plus(x, y) + VALUE x, y; +{ + switch (TYPE(y)) { + case T_FIXNUM: + return rb_float_new(RFLOAT(x)->value + (double)FIX2LONG(y)); + case T_BIGNUM: + return rb_float_new(RFLOAT(x)->value + rb_big2dbl(y)); + case T_FLOAT: + return rb_float_new(RFLOAT(x)->value + RFLOAT(y)->value); + default: + return rb_num_coerce_bin(x, y); + } +} + +/* + * call-seq: + * float + other => float + * + * Returns a new float which is the difference of <code>float</code> + * and <code>other</code>. + */ + +static VALUE +flo_minus(x, y) + VALUE x, y; +{ + switch (TYPE(y)) { + case T_FIXNUM: + return rb_float_new(RFLOAT(x)->value - (double)FIX2LONG(y)); + case T_BIGNUM: + return rb_float_new(RFLOAT(x)->value - rb_big2dbl(y)); + case T_FLOAT: + return rb_float_new(RFLOAT(x)->value - RFLOAT(y)->value); + default: + return rb_num_coerce_bin(x, y); + } +} + +/* + * call-seq: + * float * other => float + * + * Returns a new float which is the product of <code>float</code> + * and <code>other</code>. + */ + +static VALUE +flo_mul(x, y) + VALUE x, y; +{ + switch (TYPE(y)) { + case T_FIXNUM: + return rb_float_new(RFLOAT(x)->value * (double)FIX2LONG(y)); + case T_BIGNUM: + return rb_float_new(RFLOAT(x)->value * rb_big2dbl(y)); + case T_FLOAT: + return rb_float_new(RFLOAT(x)->value * RFLOAT(y)->value); + default: + return rb_num_coerce_bin(x, y); + } +} + +/* + * call-seq: + * float / other => float + * + * Returns a new float which is the result of dividing + * <code>float</code> by <code>other</code>. + */ + +static VALUE +flo_div(x, y) + VALUE x, y; +{ + long f_y; + double d; + + switch (TYPE(y)) { + case T_FIXNUM: + f_y = FIX2LONG(y); + return rb_float_new(RFLOAT(x)->value / (double)f_y); + case T_BIGNUM: + d = rb_big2dbl(y); + return rb_float_new(RFLOAT(x)->value / d); + case T_FLOAT: + return rb_float_new(RFLOAT(x)->value / RFLOAT(y)->value); + default: + return rb_num_coerce_bin(x, y); + } +} + + +static void +flodivmod(x, y, divp, modp) + double x, y; + double *divp, *modp; +{ + double div, mod; + +#ifdef HAVE_FMOD + mod = fmod(x, y); +#else + { + double z; + + modf(x/y, &z); + mod = x - z * y; + } +#endif + div = (x - mod) / y; + if (y*mod < 0) { + mod += y; + div -= 1.0; + } + if (modp) *modp = mod; + if (divp) *divp = div; +} + + +/* + * call-seq: + * flt % other => float + * flt.modulo(other) => float + * + * Return the modulo after division of <code>flt</code> by <code>other</code>. + * + * 6543.21.modulo(137) #=> 104.21 + * 6543.21.modulo(137.24) #=> 92.9299999999996 + */ + +static VALUE +flo_mod(x, y) + VALUE x, y; +{ + double fy, mod; + + switch (TYPE(y)) { + case T_FIXNUM: + fy = (double)FIX2LONG(y); + break; + case T_BIGNUM: + fy = rb_big2dbl(y); + break; + case T_FLOAT: + fy = RFLOAT(y)->value; + break; + default: + return rb_num_coerce_bin(x, y); + } + flodivmod(RFLOAT(x)->value, fy, 0, &mod); + return rb_float_new(mod); +} + +/* + * call-seq: + * flt.divmod(numeric) => array + * + * See <code>Numeric#divmod</code>. + */ + +static VALUE +flo_divmod(x, y) + VALUE x, y; +{ + double fy, div, mod; + volatile VALUE a, b; + + switch (TYPE(y)) { + case T_FIXNUM: + fy = (double)FIX2LONG(y); + break; + case T_BIGNUM: + fy = rb_big2dbl(y); + break; + case T_FLOAT: + fy = RFLOAT(y)->value; + break; + default: + return rb_num_coerce_bin(x, y); + } + flodivmod(RFLOAT(x)->value, fy, &div, &mod); + a = rb_float_new(div); + b = rb_float_new(mod); + return rb_assoc_new(a, b); +} + +/* + * call-seq: + * + * flt ** other => float + * + * Raises <code>float</code> the <code>other</code> power. + */ + +static VALUE +flo_pow(x, y) + VALUE x, y; +{ + switch (TYPE(y)) { + case T_FIXNUM: + return rb_float_new(pow(RFLOAT(x)->value, (double)FIX2LONG(y))); + case T_BIGNUM: + return rb_float_new(pow(RFLOAT(x)->value, rb_big2dbl(y))); + case T_FLOAT: + return rb_float_new(pow(RFLOAT(x)->value, RFLOAT(y)->value)); + default: + return rb_num_coerce_bin(x, y); + } +} + +/* + * call-seq: + * num.eql?(numeric) => true or false + * + * Returns <code>true</code> if <i>num</i> and <i>numeric</i> are the + * same type and have equal values. + * + * 1 == 1.0 #=> true + * 1.eql?(1.0) #=> false + * (1.0).eql?(1.0) #=> true + */ + +static VALUE +num_eql(x, y) + VALUE x, y; +{ + if (TYPE(x) != TYPE(y)) return Qfalse; + + return rb_equal(x, y); +} + +/* + * call-seq: + * num <=> other -> 0 or nil + * + * Returns zero if <i>num</i> equals <i>other</i>, <code>nil</code> + * otherwise. + */ + +static VALUE +num_cmp(x, y) + VALUE x, y; +{ + if (x == y) return INT2FIX(0); + return Qnil; +} + +static VALUE +num_equal(x, y) + VALUE x, y; +{ + if (x == y) return Qtrue; + return rb_funcall(y, id_eq, 1, x); +} + +/* + * call-seq: + * flt == obj => true or false + * + * Returns <code>true</code> only if <i>obj</i> has the same value + * as <i>flt</i>. Contrast this with <code>Float#eql?</code>, which + * requires <i>obj</i> to be a <code>Float</code>. + * + * 1.0 == 1 #=> true + * + */ + +static VALUE +flo_eq(x, y) + VALUE x, y; +{ + volatile double a, b; + + switch (TYPE(y)) { + case T_FIXNUM: + b = FIX2LONG(y); + break; + case T_BIGNUM: + b = rb_big2dbl(y); + break; + case T_FLOAT: + b = RFLOAT(y)->value; + break; + default: + return num_equal(x, y); + } + a = RFLOAT(x)->value; + if (isnan(a) || isnan(b)) return Qfalse; + return (a == b)?Qtrue:Qfalse; +} + +/* + * call-seq: + * flt.hash => integer + * + * Returns a hash code for this float. + */ + +static VALUE +flo_hash(num) + VALUE num; +{ + double d; + char *c; + int i, hash; + + d = RFLOAT(num)->value; + if (d == 0) d = fabs(d); + c = (char*)&d; + for (hash=0, i=0; i<sizeof(double);i++) { + hash += c[i] * 971; + } + if (hash < 0) hash = -hash; + return INT2FIX(hash); +} + +VALUE +rb_dbl_cmp(a, b) + double a, b; +{ + if (isnan(a) || isnan(b)) return Qnil; + if (a == b) return INT2FIX(0); + if (a > b) return INT2FIX(1); + if (a < b) return INT2FIX(-1); + return Qnil; +} + +/* + * call-seq: + * flt <=> numeric => -1, 0, +1 + * + * Returns -1, 0, or +1 depending on whether <i>flt</i> is less than, + * equal to, or greater than <i>numeric</i>. This is the basis for the + * tests in <code>Comparable</code>. + */ + +static VALUE +flo_cmp(x, y) + VALUE x, y; +{ + double a, b; + + a = RFLOAT(x)->value; + switch (TYPE(y)) { + case T_FIXNUM: + b = (double)FIX2LONG(y); + break; + + case T_BIGNUM: + b = rb_big2dbl(y); + break; + + case T_FLOAT: + b = RFLOAT(y)->value; + break; + + default: + return rb_num_coerce_cmp(x, y); + } + return rb_dbl_cmp(a, b); +} + +/* + * call-seq: + * flt > other => true or false + * + * <code>true</code> if <code>flt</code> is greater than <code>other</code>. + */ + +static VALUE +flo_gt(x, y) + VALUE x, y; +{ + double a, b; + + a = RFLOAT(x)->value; + switch (TYPE(y)) { + case T_FIXNUM: + b = (double)FIX2LONG(y); + break; + + case T_BIGNUM: + b = rb_big2dbl(y); + break; + + case T_FLOAT: + b = RFLOAT(y)->value; + break; + + default: + return rb_num_coerce_relop(x, y); + } + if (isnan(a) || isnan(b)) return Qfalse; + return (a > b)?Qtrue:Qfalse; +} + +/* + * call-seq: + * flt >= other => true or false + * + * <code>true</code> if <code>flt</code> is greater than + * or equal to <code>other</code>. + */ + +static VALUE +flo_ge(x, y) + VALUE x, y; +{ + double a, b; + + a = RFLOAT(x)->value; + switch (TYPE(y)) { + case T_FIXNUM: + b = (double)FIX2LONG(y); + break; + + case T_BIGNUM: + b = rb_big2dbl(y); + break; + + case T_FLOAT: + b = RFLOAT(y)->value; + break; + + default: + return rb_num_coerce_relop(x, y); + } + if (isnan(a) || isnan(b)) return Qfalse; + return (a >= b)?Qtrue:Qfalse; +} + +/* + * call-seq: + * flt < other => true or false + * + * <code>true</code> if <code>flt</code> is less than <code>other</code>. + */ + +static VALUE +flo_lt(x, y) + VALUE x, y; +{ + double a, b; + + a = RFLOAT(x)->value; + switch (TYPE(y)) { + case T_FIXNUM: + b = (double)FIX2LONG(y); + break; + + case T_BIGNUM: + b = rb_big2dbl(y); + break; + + case T_FLOAT: + b = RFLOAT(y)->value; + break; + + default: + return rb_num_coerce_relop(x, y); + } + if (isnan(a) || isnan(b)) return Qfalse; + return (a < b)?Qtrue:Qfalse; +} + +/* + * call-seq: + * flt <= other => true or false + * + * <code>true</code> if <code>flt</code> is less than + * or equal to <code>other</code>. + */ + +static VALUE +flo_le(x, y) + VALUE x, y; +{ + double a, b; + + a = RFLOAT(x)->value; + switch (TYPE(y)) { + case T_FIXNUM: + b = (double)FIX2LONG(y); + break; + + case T_BIGNUM: + b = rb_big2dbl(y); + break; + + case T_FLOAT: + b = RFLOAT(y)->value; + break; + + default: + return rb_num_coerce_relop(x, y); + } + if (isnan(a) || isnan(b)) return Qfalse; + return (a <= b)?Qtrue:Qfalse; +} + +/* + * call-seq: + * flt.eql?(obj) => true or false + * + * Returns <code>true</code> only if <i>obj</i> is a + * <code>Float</code> with the same value as <i>flt</i>. Contrast this + * with <code>Float#==</code>, which performs type conversions. + * + * 1.0.eql?(1) #=> false + */ + +static VALUE +flo_eql(x, y) + VALUE x, y; +{ + if (TYPE(y) == T_FLOAT) { + double a = RFLOAT(x)->value; + double b = RFLOAT(y)->value; + + if (isnan(a) || isnan(b)) return Qfalse; + if (a == b) return Qtrue; + } + return Qfalse; +} + +/* + * call-seq: + * flt.to_f => flt + * + * As <code>flt</code> is already a float, returns <i>self</i>. + */ + +static VALUE +flo_to_f(num) + VALUE num; +{ + return num; +} + +/* + * call-seq: + * flt.abs => float + * + * Returns the absolute value of <i>flt</i>. + * + * (-34.56).abs #=> 34.56 + * -34.56.abs #=> 34.56 + * + */ + +static VALUE +flo_abs(flt) + VALUE flt; +{ + double val = fabs(RFLOAT(flt)->value); + return rb_float_new(val); +} + +/* + * call-seq: + * flt.zero? -> true or false + * + * Returns <code>true</code> if <i>flt</i> is 0.0. + * + */ + +static VALUE +flo_zero_p(num) + VALUE num; +{ + if (RFLOAT(num)->value == 0.0) { + return Qtrue; + } + return Qfalse; +} + +/* + * call-seq: + * flt.nan? -> true or false + * + * Returns <code>true</code> if <i>flt</i> is an invalid IEEE floating + * point number. + * + * a = -1.0 #=> -1.0 + * a.nan? #=> false + * a = 0.0/0.0 #=> NaN + * a.nan? #=> true + */ + +static VALUE +flo_is_nan_p(num) + VALUE num; +{ + double value = RFLOAT(num)->value; + + return isnan(value) ? Qtrue : Qfalse; +} + +/* + * call-seq: + * flt.infinite? -> nil, -1, +1 + * + * Returns <code>nil</code>, -1, or +1 depending on whether <i>flt</i> + * is finite, -infinity, or +infinity. + * + * (0.0).infinite? #=> nil + * (-1.0/0.0).infinite? #=> -1 + * (+1.0/0.0).infinite? #=> 1 + */ + +static VALUE +flo_is_infinite_p(num) + VALUE num; +{ + double value = RFLOAT(num)->value; + + if (isinf(value)) { + return INT2FIX( value < 0 ? -1 : 1 ); + } + + return Qnil; +} + +/* + * call-seq: + * flt.finite? -> true or false + * + * Returns <code>true</code> if <i>flt</i> is a valid IEEE floating + * point number (it is not infinite, and <code>nan?</code> is + * <code>false</code>). + * + */ + +static VALUE +flo_is_finite_p(num) + VALUE num; +{ + double value = RFLOAT(num)->value; + +#if HAVE_FINITE + if (!finite(value)) + return Qfalse; +#else + if (isinf(value) || isnan(value)) + return Qfalse; +#endif + + return Qtrue; +} + +/* + * call-seq: + * flt.floor => integer + * + * Returns the largest integer less than or equal to <i>flt</i>. + * + * 1.2.floor #=> 1 + * 2.0.floor #=> 2 + * (-1.2).floor #=> -2 + * (-2.0).floor #=> -2 + */ + +static VALUE +flo_floor(num) + VALUE num; +{ + double f = floor(RFLOAT(num)->value); + long val; + + if (!FIXABLE(f)) { + return rb_dbl2big(f); + } + val = f; + return LONG2FIX(val); +} + +/* + * call-seq: + * flt.ceil => integer + * + * Returns the smallest <code>Integer</code> greater than or equal to + * <i>flt</i>. + * + * 1.2.ceil #=> 2 + * 2.0.ceil #=> 2 + * (-1.2).ceil #=> -1 + * (-2.0).ceil #=> -2 + */ + +static VALUE +flo_ceil(num) + VALUE num; +{ + double f = ceil(RFLOAT(num)->value); + long val; + + if (!FIXABLE(f)) { + return rb_dbl2big(f); + } + val = f; + return LONG2FIX(val); +} + +/* + * call-seq: + * flt.round => integer + * + * Rounds <i>flt</i> to the nearest integer. Equivalent to: + * + * def round + * return floor(self+0.5) if self > 0.0 + * return ceil(self-0.5) if self < 0.0 + * return 0.0 + * end + * + * 1.5.round #=> 2 + * (-1.5).round #=> -2 + * + */ + +static VALUE +flo_round(num) + VALUE num; +{ + double f = RFLOAT(num)->value; + long val; + + if (f > 0.0) f = floor(f+0.5); + if (f < 0.0) f = ceil(f-0.5); + + if (!FIXABLE(f)) { + return rb_dbl2big(f); + } + val = f; + return LONG2FIX(val); +} + +/* + * call-seq: + * flt.to_i => integer + * flt.to_int => integer + * flt.truncate => integer + * + * Returns <i>flt</i> truncated to an <code>Integer</code>. + */ + +static VALUE +flo_truncate(num) + VALUE num; +{ + double f = RFLOAT(num)->value; + long val; + + if (f > 0.0) f = floor(f); + if (f < 0.0) f = ceil(f); + + if (!FIXABLE(f)) { + return rb_dbl2big(f); + } + val = f; + return LONG2FIX(val); +} + + +/* + * call-seq: + * num.floor => integer + * + * Returns the largest integer less than or equal to <i>num</i>. + * <code>Numeric</code> implements this by converting <i>anInteger</i> + * to a <code>Float</code> and invoking <code>Float#floor</code>. + * + * 1.floor #=> 1 + * (-1).floor #=> -1 + */ + +static VALUE +num_floor(num) + VALUE num; +{ + return flo_floor(rb_Float(num)); +} + + +/* + * call-seq: + * num.ceil => integer + * + * Returns the smallest <code>Integer</code> greater than or equal to + * <i>num</i>. Class <code>Numeric</code> achieves this by converting + * itself to a <code>Float</code> then invoking + * <code>Float#ceil</code>. + * + * 1.ceil #=> 1 + * 1.2.ceil #=> 2 + * (-1.2).ceil #=> -1 + * (-1.0).ceil #=> -1 + */ + +static VALUE +num_ceil(num) + VALUE num; +{ + return flo_ceil(rb_Float(num)); +} + +/* + * call-seq: + * num.round => integer + * + * Rounds <i>num</i> to the nearest integer. <code>Numeric</code> + * implements this by converting itself to a + * <code>Float</code> and invoking <code>Float#round</code>. + */ + +static VALUE +num_round(num) + VALUE num; +{ + return flo_round(rb_Float(num)); +} + +/* + * call-seq: + * num.truncate => integer + * + * Returns <i>num</i> truncated to an integer. <code>Numeric</code> + * implements this by converting its value to a float and invoking + * <code>Float#truncate</code>. + */ + +static VALUE +num_truncate(num) + VALUE num; +{ + return flo_truncate(rb_Float(num)); +} + + +/* + * call-seq: + * num.step(limit, step ) {|i| block } => num + * + * Invokes <em>block</em> with the sequence of numbers starting at + * <i>num</i>, incremented by <i>step</i> on each call. The loop + * finishes when the value to be passed to the block is greater than + * <i>limit</i> (if <i>step</i> is positive) or less than + * <i>limit</i> (if <i>step</i> is negative). If all the arguments are + * integers, the loop operates using an integer counter. If any of the + * arguments are floating point numbers, all are converted to floats, + * and the loop is executed <i>floor(n + n*epsilon)+ 1</i> times, + * where <i>n = (limit - num)/step</i>. Otherwise, the loop + * starts at <i>num</i>, uses either the <code><</code> or + * <code>></code> operator to compare the counter against + * <i>limit</i>, and increments itself using the <code>+</code> + * operator. + * + * 1.step(10, 2) { |i| print i, " " } + * Math::E.step(Math::PI, 0.2) { |f| print f, " " } + * + * <em>produces:</em> + * + * 1 3 5 7 9 + * 2.71828182845905 2.91828182845905 3.11828182845905 + */ + +static VALUE +num_step(argc, argv, from) + int argc; + VALUE *argv; + VALUE from; +{ + VALUE to, step; + + if (argc == 1) { + to = argv[0]; + step = INT2FIX(1); + } + else { + if (argc == 2) { + to = argv[0]; + step = argv[1]; + } + else { + rb_raise(rb_eArgError, "wrong number of arguments"); + } + if (rb_equal(step, INT2FIX(0))) { + rb_raise(rb_eArgError, "step can't be 0"); + } + } + + if (FIXNUM_P(from) && FIXNUM_P(to) && FIXNUM_P(step)) { + long i, end, diff; + + i = FIX2LONG(from); + end = FIX2LONG(to); + diff = FIX2LONG(step); + + if (diff > 0) { + while (i <= end) { + rb_yield(LONG2FIX(i)); + i += diff; + } + } + else { + while (i >= end) { + rb_yield(LONG2FIX(i)); + i += diff; + } + } + } + else if (TYPE(from) == T_FLOAT || TYPE(to) == T_FLOAT || TYPE(step) == T_FLOAT) { + const double epsilon = DBL_EPSILON; + double beg = NUM2DBL(from); + double end = NUM2DBL(to); + double unit = NUM2DBL(step); + double n = (end - beg)/unit; + double err = (fabs(beg) + fabs(end) + fabs(end-beg)) / fabs(unit) * epsilon; + long i; + + if (err>0.5) err=0.5; + n = floor(n + err) + 1; + for (i=0; i<n; i++) { + rb_yield(rb_float_new(i*unit+beg)); + } + } + else { + VALUE i = from; + ID cmp; + + if (RTEST(rb_funcall(step, '>', 1, INT2FIX(0)))) { + cmp = '>'; + } + else { + cmp = '<'; + } + for (;;) { + if (RTEST(rb_funcall(i, cmp, 1, to))) break; + rb_yield(i); + i = rb_funcall(i, '+', 1, step); + } + } + return from; +} + +long +rb_num2long(val) + VALUE val; +{ + if (NIL_P(val)) { + rb_raise(rb_eTypeError, "no implicit conversion from nil to integer"); + } + + if (FIXNUM_P(val)) return FIX2LONG(val); + + switch (TYPE(val)) { + case T_FLOAT: + if (RFLOAT(val)->value <= (double)LONG_MAX + && RFLOAT(val)->value >= (double)LONG_MIN) { + return (long)(RFLOAT(val)->value); + } + else { + char buf[24]; + char *s; + + sprintf(buf, "%-.10g", RFLOAT(val)->value); + if (s = strchr(buf, ' ')) *s = '\0'; + rb_raise(rb_eRangeError, "float %s out of range of integer", buf); + } + + case T_BIGNUM: + return rb_big2long(val); + + default: + val = rb_to_int(val); + return NUM2LONG(val); + } +} + +unsigned long +rb_num2ulong(val) + VALUE val; +{ + if (TYPE(val) == T_BIGNUM) { + return rb_big2ulong(val); + } + return (unsigned long)rb_num2long(val); +} + +#if SIZEOF_INT < SIZEOF_LONG +static void +check_int(num) + long num; +{ + const char *s; + + if (num < INT_MIN) { + s = "small"; + } + else if (num > INT_MAX) { + s = "big"; + } + else { + return; + } + rb_raise(rb_eRangeError, "integer %ld too %s to convert to `int'", num, s); +} + +static void +check_uint(num) + unsigned long num; +{ + if (num > UINT_MAX) { + rb_raise(rb_eRangeError, "integer %lu too big to convert to `unsigned int'", num); + } +} + +long +rb_num2int(val) + VALUE val; +{ + long num = rb_num2long(val); + + check_int(num); + return num; +} + +long +rb_fix2int(val) + VALUE val; +{ + long num = FIXNUM_P(val)?FIX2LONG(val):rb_num2long(val); + + check_int(num); + return num; +} + +unsigned long +rb_num2uint(val) + VALUE val; +{ + unsigned long num = rb_num2ulong(val); + + if (RTEST(rb_funcall(INT2FIX(0), '<', 1, val))) { + check_uint(num); + } + return num; +} + +unsigned long +rb_fix2uint(val) + VALUE val; +{ + unsigned long num; + + if (!FIXNUM_P(val)) { + return rb_num2uint(val); + } + num = FIX2ULONG(val); + if (FIX2LONG(val) > 0) { + check_uint(num); + } + return num; +} +#else +long +rb_num2int(val) + VALUE val; +{ + return rb_num2long(val); +} + +long +rb_fix2int(val) + VALUE val; +{ + return FIX2INT(val); +} +#endif + +VALUE +rb_num2fix(val) + VALUE val; +{ + long v; + + if (FIXNUM_P(val)) return val; + + v = rb_num2long(val); + if (!FIXABLE(v)) + rb_raise(rb_eRangeError, "integer %ld out of range of fixnum", v); + return LONG2FIX(v); +} + +#if HAVE_LONG_LONG + +LONG_LONG +rb_num2ll(val) + VALUE val; +{ + if (NIL_P(val)) { + rb_raise(rb_eTypeError, "no implicit conversion from nil"); + } + + if (FIXNUM_P(val)) return (LONG_LONG)FIX2LONG(val); + + switch (TYPE(val)) { + case T_FLOAT: + if (RFLOAT(val)->value <= (double)LLONG_MAX + && RFLOAT(val)->value >= (double)LLONG_MIN) { + return (LONG_LONG)(RFLOAT(val)->value); + } + else { + char buf[24]; + char *s; + + sprintf(buf, "%-.10g", RFLOAT(val)->value); + if (s = strchr(buf, ' ')) *s = '\0'; + rb_raise(rb_eRangeError, "float %s out of range of long long", buf); + } + + case T_BIGNUM: + return rb_big2ll(val); + + case T_STRING: + rb_raise(rb_eTypeError, "no implicit conversion from string"); + return Qnil; /* not reached */ + + case T_TRUE: + case T_FALSE: + rb_raise(rb_eTypeError, "no implicit conversion from boolean"); + return Qnil; /* not reached */ + + default: + val = rb_to_int(val); + return NUM2LL(val); + } +} + +unsigned LONG_LONG +rb_num2ull(val) + VALUE val; +{ + if (TYPE(val) == T_BIGNUM) { + return rb_big2ull(val); + } + return (unsigned LONG_LONG)rb_num2ll(val); +} + +#endif /* HAVE_LONG_LONG */ + + +/* + * Document-class: Integer + * + * <code>Integer</code> is the basis for the two concrete classes that + * hold whole numbers, <code>Bignum</code> and <code>Fixnum</code>. + * + */ + + +/* + * call-seq: + * int.to_i => int + * int.to_int => int + * int.floor => int + * int.ceil => int + * int.round => int + * int.truncate => int + * + * As <i>int</i> is already an <code>Integer</code>, all these + * methods simply return the receiver. + */ + +static VALUE +int_to_i(num) + VALUE num; +{ + return num; +} + +/* + * call-seq: + * int.integer? -> true + * + * Always returns <code>true</code>. + */ + +static VALUE +int_int_p(num) + VALUE num; +{ + return Qtrue; +} + +/* + * call-seq: + * int.next => integer + * int.succ => integer + * + * Returns the <code>Integer</code> equal to <i>int</i> + 1. + * + * 1.next #=> 2 + * (-1).next #=> 0 + */ + +static VALUE +int_succ(num) + VALUE num; +{ + if (FIXNUM_P(num)) { + long i = FIX2LONG(num) + 1; + return LONG2NUM(i); + } + return rb_funcall(num, '+', 1, INT2FIX(1)); +} + +/* + * call-seq: + * int.chr => string + * + * Returns a string containing the ASCII character represented by the + * receiver's value. + * + * 65.chr #=> "A" + * ?a.chr #=> "a" + * 230.chr #=> "\346" + */ + +static VALUE +int_chr(num) + VALUE num; +{ + char c; + long i = NUM2LONG(num); + + if (i < 0 || 0xff < i) + rb_raise(rb_eRangeError, "%ld out of char range", i); + c = i; + return rb_str_new(&c, 1); +} + +/******************************************************************** + * + * Document-class: Fixnum + * + * A <code>Fixnum</code> holds <code>Integer</code> values that can be + * represented in a native machine word (minus 1 bit). If any operation + * on a <code>Fixnum</code> exceeds this range, the value is + * automatically converted to a <code>Bignum</code>. + * + * <code>Fixnum</code> objects have immediate value. This means that + * when they are assigned or passed as parameters, the actual object is + * passed, rather than a reference to that object. Assignment does not + * alias <code>Fixnum</code> objects. There is effectively only one + * <code>Fixnum</code> object instance for any given integer value, so, + * for example, you cannot add a singleton method to a + * <code>Fixnum</code>. + */ + + +/* + * call-seq: + * Fixnum.induced_from(obj) => fixnum + * + * Convert <code>obj</code> to a Fixnum. Works with numeric parameters. + * Also works with Symbols, but this is deprecated. + */ + +static VALUE +rb_fix_induced_from(klass, x) + VALUE klass, x; +{ + return rb_num2fix(x); +} + +/* + * call-seq: + * Integer.induced_from(obj) => fixnum, bignum + * + * Convert <code>obj</code> to an Integer. + */ + +static VALUE +rb_int_induced_from(klass, x) + VALUE klass, x; +{ + switch (TYPE(x)) { + case T_FIXNUM: + case T_BIGNUM: + return x; + case T_FLOAT: + return rb_funcall(x, id_to_i, 0); + default: + rb_raise(rb_eTypeError, "failed to convert %s into Integer", + rb_obj_classname(x)); + } +} + +/* + * call-seq: + * Float.induced_from(obj) => float + * + * Convert <code>obj</code> to a float. + */ + +static VALUE +rb_flo_induced_from(klass, x) + VALUE klass, x; +{ + switch (TYPE(x)) { + case T_FIXNUM: + case T_BIGNUM: + return rb_funcall(x, rb_intern("to_f"), 0); + case T_FLOAT: + return x; + default: + rb_raise(rb_eTypeError, "failed to convert %s into Float", + rb_obj_classname(x)); + } +} + +/* + * call-seq: + * -fix => integer + * + * Negates <code>fix</code> (which might return a Bignum). + */ + +static VALUE +fix_uminus(num) + VALUE num; +{ + return LONG2NUM(-FIX2LONG(num)); +} + +VALUE +rb_fix2str(x, base) + VALUE x; + int base; +{ + extern const char ruby_digitmap[]; + char buf[SIZEOF_LONG*CHAR_BIT + 2], *b = buf + sizeof buf; + long val = FIX2LONG(x); + int neg = 0; + + if (base < 2 || 36 < base) { + rb_raise(rb_eArgError, "illegal radix %d", base); + } + if (val == 0) { + return rb_str_new2("0"); + } + if (val < 0) { + val = -val; + neg = 1; + } + *--b = '\0'; + do { + *--b = ruby_digitmap[(int)(val % base)]; + } while (val /= base); + if (neg) { + *--b = '-'; + } + + return rb_str_new2(b); +} + +/* + * call-seq: + * fix.to_s( base=10 ) -> aString + * + * Returns a string containing the representation of <i>fix</i> radix + * <i>base</i> (between 2 and 36). + * + * 12345.to_s #=> "12345" + * 12345.to_s(2) #=> "11000000111001" + * 12345.to_s(8) #=> "30071" + * 12345.to_s(10) #=> "12345" + * 12345.to_s(16) #=> "3039" + * 12345.to_s(36) #=> "9ix" + * + */ +static VALUE +fix_to_s(argc, argv, x) + int argc; + VALUE *argv; + VALUE x; +{ + VALUE b; + int base; + + rb_scan_args(argc, argv, "01", &b); + if (argc == 0) base = 10; + else base = NUM2INT(b); + + if (base == 2) { + /* rb_fix2str() does not handle binary */ + return rb_big2str(rb_int2big(FIX2INT(x)), 2); + } + return rb_fix2str(x, base); +} + +/* + * call-seq: + * fix + numeric => numeric_result + * + * Performs addition: the class of the resulting object depends on + * the class of <code>numeric</code> and on the magnitude of the + * result. + */ + +static VALUE +fix_plus(x, y) + VALUE x, y; +{ + if (FIXNUM_P(y)) { + long a, b, c; + VALUE r; + + a = FIX2LONG(x); + b = FIX2LONG(y); + c = a + b; + r = LONG2FIX(c); + + if (FIX2LONG(r) != c) { + r = rb_big_plus(rb_int2big(a), rb_int2big(b)); + } + return r; + } + if (TYPE(y) == T_FLOAT) { + return rb_float_new((double)FIX2LONG(x) + RFLOAT(y)->value); + } + return rb_num_coerce_bin(x, y); +} + +/* + * call-seq: + * fix - numeric => numeric_result + * + * Performs subtraction: the class of the resulting object depends on + * the class of <code>numeric</code> and on the magnitude of the + * result. + */ + +static VALUE +fix_minus(x, y) + VALUE x, y; +{ + if (FIXNUM_P(y)) { + long a, b, c; + VALUE r; + + a = FIX2LONG(x); + b = FIX2LONG(y); + c = a - b; + r = LONG2FIX(c); + + if (FIX2LONG(r) != c) { + r = rb_big_minus(rb_int2big(a), rb_int2big(b)); + } + return r; + } + if (TYPE(y) == T_FLOAT) { + return rb_float_new((double)FIX2LONG(x) - RFLOAT(y)->value); + } + return rb_num_coerce_bin(x, y); +} + +/* + * call-seq: + * fix * numeric => numeric_result + * + * Performs multiplication: the class of the resulting object depends on + * the class of <code>numeric</code> and on the magnitude of the + * result. + */ + +static VALUE +fix_mul(x, y) + VALUE x, y; +{ + if (FIXNUM_P(y)) { + long a, b, c; + VALUE r; + + a = FIX2LONG(x); + if (a == 0) return x; + + b = FIX2LONG(y); + c = a * b; + r = LONG2FIX(c); + + if (FIX2LONG(r) != c || c/a != b) { + r = rb_big_mul(rb_int2big(a), rb_int2big(b)); + } + return r; + } + if (TYPE(y) == T_FLOAT) { + return rb_float_new((double)FIX2LONG(x) * RFLOAT(y)->value); + } + return rb_num_coerce_bin(x, y); +} + +static void +fixdivmod(x, y, divp, modp) + long x, y; + long *divp, *modp; +{ + long div, mod; + + if (y == 0) rb_num_zerodiv(); + if (y < 0) { + if (x < 0) + div = -x / -y; + else + div = - (x / -y); + } + else { + if (x < 0) + div = - (-x / y); + else + div = x / y; + } + mod = x - div*y; + if ((mod < 0 && y > 0) || (mod > 0 && y < 0)) { + mod += y; + div -= 1; + } + if (divp) *divp = div; + if (modp) *modp = mod; +} + +/* + * call-seq: + * fix.quo(numeric) => float + * + * Returns the floating point result of dividing <i>fix</i> by + * <i>numeric</i>. + * + * 654321.quo(13731) #=> 47.6528293642124 + * 654321.quo(13731.24) #=> 47.6519964693647 + * + */ + +static VALUE +fix_quo(x, y) + VALUE x, y; +{ + if (FIXNUM_P(y)) { + return rb_float_new((double)FIX2LONG(x) / (double)FIX2LONG(y)); + } + return rb_num_coerce_bin(x, y); +} + +/* + * call-seq: + * fix / numeric => numeric_result + * fix.div(numeric) => numeric_result + * + * Performs division: the class of the resulting object depends on + * the class of <code>numeric</code> and on the magnitude of the + * result. + */ + +static VALUE +fix_div(x, y) + VALUE x, y; +{ + if (FIXNUM_P(y)) { + long div; + + fixdivmod(FIX2LONG(x), FIX2LONG(y), &div, 0); + return LONG2NUM(div); + } + return rb_num_coerce_bin(x, y); +} + +/* + * call-seq: + * fix % other => Numeric + * fix.modulo(other) => Numeric + * + * Returns <code>fix</code> modulo <code>other</code>. + * See <code>Numeric.divmod</code> for more information. + */ + +static VALUE +fix_mod(x, y) + VALUE x, y; +{ + if (FIXNUM_P(y)) { + long mod; + + fixdivmod(FIX2LONG(x), FIX2LONG(y), 0, &mod); + return LONG2NUM(mod); + } + return rb_num_coerce_bin(x, y); +} + +/* + * call-seq: + * fix.divmod(numeric) => array + * + * See <code>Numeric#divmod</code>. + */ +static VALUE +fix_divmod(x, y) + VALUE x, y; +{ + if (FIXNUM_P(y)) { + long div, mod; + + fixdivmod(FIX2LONG(x), FIX2LONG(y), &div, &mod); + + return rb_assoc_new(LONG2NUM(div), LONG2NUM(mod)); + } + return rb_num_coerce_bin(x, y); +} + +/* + * call-seq: + * fix ** other => Numeric + * + * Raises <code>fix</code> to the <code>other</code> power, which may + * be negative or fractional. + * + * 2 ** 3 #=> 8 + * 2 ** -1 #=> 0.5 + * 2 ** 0.5 #=> 1.4142135623731 + */ + +static VALUE +fix_pow(x, y) + VALUE x, y; +{ + if (FIXNUM_P(y)) { + long a, b; + + b = FIX2LONG(y); + if (b == 0) return INT2FIX(1); + if (b == 1) return x; + a = FIX2LONG(x); + if (b > 0) { + return rb_big_pow(rb_int2big(a), y); + } + return rb_float_new(pow((double)a, (double)b)); + } + return rb_num_coerce_bin(x, y); +} + +/* + * call-seq: + * fix == other + * + * Return <code>true</code> if <code>fix</code> equals <code>other</code> + * numerically. + * + * 1 == 2 #=> false + * 1 == 1.0 #=> true + */ + +static VALUE +fix_equal(x, y) + VALUE x, y; +{ + if (FIXNUM_P(y)) { + return (FIX2LONG(x) == FIX2LONG(y))?Qtrue:Qfalse; + } + else { + return num_equal(x, y); + } +} + +/* + * call-seq: + * fix <=> numeric => -1, 0, +1 + * + * Comparison---Returns -1, 0, or +1 depending on whether <i>fix</i> is + * less than, equal to, or greater than <i>numeric</i>. This is the + * basis for the tests in <code>Comparable</code>. + */ + +static VALUE +fix_cmp(x, y) + VALUE x, y; +{ + if (FIXNUM_P(y)) { + long a = FIX2LONG(x), b = FIX2LONG(y); + + if (a == b) return INT2FIX(0); + if (a > b) return INT2FIX(1); + return INT2FIX(-1); + } + else { + return rb_num_coerce_cmp(x, y); + } +} + +/* + * call-seq: + * fix > other => true or false + * + * Returns <code>true</code> if the value of <code>fix</code> is + * greater than that of <code>other</code>. + */ + +static VALUE +fix_gt(x, y) + VALUE x, y; +{ + if (FIXNUM_P(y)) { + long a = FIX2LONG(x), b = FIX2LONG(y); + + if (a > b) return Qtrue; + return Qfalse; + } + else { + return rb_num_coerce_relop(x, y); + } +} + +/* + * call-seq: + * fix >= other => true or false + * + * Returns <code>true</code> if the value of <code>fix</code> is + * greater than or equal to that of <code>other</code>. + */ + +static VALUE +fix_ge(x, y) + VALUE x, y; +{ + if (FIXNUM_P(y)) { + long a = FIX2LONG(x), b = FIX2LONG(y); + + if (a >= b) return Qtrue; + return Qfalse; + } + else { + return rb_num_coerce_relop(x, y); + } +} + +/* + * call-seq: + * fix < other => true or false + * + * Returns <code>true</code> if the value of <code>fix</code> is + * less than that of <code>other</code>. + */ + +static VALUE +fix_lt(x, y) + VALUE x, y; +{ + if (FIXNUM_P(y)) { + long a = FIX2LONG(x), b = FIX2LONG(y); + + if (a < b) return Qtrue; + return Qfalse; + } + else { + return rb_num_coerce_relop(x, y); + } +} + +/* + * call-seq: + * fix <= other => true or false + * + * Returns <code>true</code> if the value of <code>fix</code> is + * less thanor equal to that of <code>other</code>. + */ + +static VALUE +fix_le(x, y) + VALUE x, y; +{ + if (FIXNUM_P(y)) { + long a = FIX2LONG(x), b = FIX2LONG(y); + + if (a <= b) return Qtrue; + return Qfalse; + } + else { + return rb_num_coerce_relop(x, y); + } +} + +/* + * call-seq: + * ~fix => integer + * + * One's complement: returns a number where each bit is flipped. + */ + +static VALUE +fix_rev(num) + VALUE num; +{ + long val = FIX2LONG(num); + + val = ~val; + return LONG2NUM(val); +} + +/* + * call-seq: + * fix & other => integer + * + * Bitwise AND. + */ + +static VALUE +fix_and(x, y) + VALUE x, y; +{ + long val; + + if (TYPE(y) == T_BIGNUM) { + return rb_big_and(y, x); + } + val = FIX2LONG(x) & NUM2LONG(y); + return LONG2NUM(val); +} + +/* + * call-seq: + * fix | other => integer + * + * Bitwise OR. + */ + +static VALUE +fix_or(x, y) + VALUE x, y; +{ + long val; + + if (TYPE(y) == T_BIGNUM) { + return rb_big_or(y, x); + } + val = FIX2LONG(x) | NUM2LONG(y); + return LONG2NUM(val); +} + +/* + * call-seq: + * fix ^ other => integer + * + * Bitwise EXCLUSIVE OR. + */ + +static VALUE +fix_xor(x, y) + VALUE x, y; +{ + long val; + + if (TYPE(y) == T_BIGNUM) { + return rb_big_xor(y, x); + } + val = FIX2LONG(x) ^ NUM2LONG(y); + return LONG2NUM(val); +} + +static VALUE fix_rshift _((VALUE, VALUE)); + +/* + * call-seq: + * fix << count => integer + * + * Shifts _fix_ left _count_ positions (right if _count_ is negative). + */ + +static VALUE +fix_lshift(x, y) + VALUE x, y; +{ + long val, width; + + val = NUM2LONG(x); + width = NUM2LONG(y); + if (width < 0) + return fix_rshift(x, LONG2FIX(-width)); + if (width > (sizeof(VALUE)*CHAR_BIT-1) + || ((unsigned long)val)>>(sizeof(VALUE)*CHAR_BIT-1-width) > 0) { + return rb_big_lshift(rb_int2big(val), y); + } + val = val << width; + return LONG2NUM(val); +} + +/* + * call-seq: + * fix >> count => integer + * + * Shifts _fix_ left _count_ positions (right if _count_ is negative). + */ + +static VALUE +fix_rshift(x, y) + VALUE x, y; +{ + long i, val; + + i = NUM2LONG(y); + if (i < 0) + return fix_lshift(x, LONG2FIX(-i)); + if (i == 0) return x; + val = FIX2LONG(x); + if (i >= sizeof(long)*CHAR_BIT-1) { + if (val < 0) return INT2FIX(-1); + return INT2FIX(0); + } + val = RSHIFT(val, i); + return LONG2FIX(val); +} + +/* + * call-seq: + * fix[n] => 0, 1 + * + * Bit Reference---Returns the <em>n</em>th bit in the binary + * representation of <i>fix</i>, where <i>fix</i>[0] is the least + * significant bit. + * + * a = 0b11001100101010 + * 30.downto(0) do |n| print a[n] end + * + * <em>produces:</em> + * + * 0000000000000000011001100101010 + */ + +static VALUE +fix_aref(fix, idx) + VALUE fix, idx; +{ + long val = FIX2LONG(fix); + long i; + + if (TYPE(idx) == T_BIGNUM) { + idx = rb_big_norm(idx); + if (!FIXNUM_P(idx)) { + if (!RBIGNUM(idx)->sign || val >= 0) + return INT2FIX(0); + return INT2FIX(1); + } + } + i = NUM2LONG(idx); + + if (i < 0) return INT2FIX(0); + if (sizeof(VALUE)*CHAR_BIT-1 < i) { + if (val < 0) return INT2FIX(1); + return INT2FIX(0); + } + if (val & (1L<<i)) + return INT2FIX(1); + return INT2FIX(0); +} + +/* + * call-seq: + * fix.to_f -> float + * + * Converts <i>fix</i> to a <code>Float</code>. + * + */ + +static VALUE +fix_to_f(num) + VALUE num; +{ + double val; + + val = (double)FIX2LONG(num); + + return rb_float_new(val); +} + +/* + * call-seq: + * fix.abs -> aFixnum + * + * Returns the absolute value of <i>fix</i>. + * + * -12345.abs #=> 12345 + * 12345.abs #=> 12345 + * + */ + +static VALUE +fix_abs(fix) + VALUE fix; +{ + long i = FIX2LONG(fix); + + if (i < 0) i = -i; + + return LONG2NUM(i); +} + +/* + * call-seq: + * fix.id2name -> string or nil + * + * Returns the name of the object whose symbol id is <i>fix</i>. If + * there is no symbol in the symbol table with this value, returns + * <code>nil</code>. <code>id2name</code> has nothing to do with the + * <code>Object.id</code> method. See also <code>Fixnum#to_sym</code>, + * <code>String#intern</code>, and class <code>Symbol</code>. + * + * symbol = :@inst_var #=> :@inst_var + * id = symbol.to_i #=> 9818 + * id.id2name #=> "@inst_var" + */ + +static VALUE +fix_id2name(fix) + VALUE fix; +{ + char *name = rb_id2name(FIX2UINT(fix)); + if (name) return rb_str_new2(name); + return Qnil; +} + + +/* + * call-seq: + * fix.to_sym -> aSymbol + * + * Returns the symbol whose integer value is <i>fix</i>. See also + * <code>Fixnum#id2name</code>. + * + * fred = :fred.to_i + * fred.id2name #=> "fred" + * fred.to_sym #=> :fred + */ + +static VALUE +fix_to_sym(fix) + VALUE fix; +{ + ID id = FIX2UINT(fix); + + if (rb_id2name(id)) { + return ID2SYM(id); + } + return Qnil; +} + + +/* + * call-seq: + * fix.size -> fixnum + * + * Returns the number of <em>bytes</em> in the machine representation + * of a <code>Fixnum</code>. + * + * 1.size #=> 4 + * -1.size #=> 4 + * 2147483647.size #=> 4 + */ + +static VALUE +fix_size(fix) + VALUE fix; +{ + return INT2FIX(sizeof(long)); +} + +/* + * call-seq: + * int.upto(limit) {|i| block } => int + * + * Iterates <em>block</em>, passing in integer values from <i>int</i> + * up to and including <i>limit</i>. + * + * 5.upto(10) { |i| print i, " " } + * + * <em>produces:</em> + * + * 5 6 7 8 9 10 + */ + +static VALUE +int_upto(from, to) + VALUE from, to; +{ + if (FIXNUM_P(from) && FIXNUM_P(to)) { + long i, end; + + end = FIX2LONG(to); + for (i = FIX2LONG(from); i <= end; i++) { + rb_yield(LONG2FIX(i)); + } + } + else { + VALUE i = from, c; + + while (!(c = rb_funcall(i, '>', 1, to))) { + rb_yield(i); + i = rb_funcall(i, '+', 1, INT2FIX(1)); + } + if (NIL_P(c)) rb_cmperr(i, to); + } + return from; +} + +/* + * call-seq: + * int.downto(limit) {|i| block } => int + * + * Iterates <em>block</em>, passing decreasing values from <i>int</i> + * down to and including <i>limit</i>. + * + * 5.downto(1) { |n| print n, ".. " } + * print " Liftoff!\n" + * + * <em>produces:</em> + * + * 5.. 4.. 3.. 2.. 1.. Liftoff! + */ + +static VALUE +int_downto(from, to) + VALUE from, to; +{ + if (FIXNUM_P(from) && FIXNUM_P(to)) { + long i, end; + + end = FIX2LONG(to); + for (i=FIX2LONG(from); i >= end; i--) { + rb_yield(LONG2FIX(i)); + } + } + else { + VALUE i = from, c; + + while (!(c = rb_funcall(i, '<', 1, to))) { + rb_yield(i); + i = rb_funcall(i, '-', 1, INT2FIX(1)); + } + if (NIL_P(c)) rb_cmperr(i, to); + } + return from; +} + +/* + * call-seq: + * int.times {|i| block } => int + * + * Iterates block <i>int</i> times, passing in values from zero to + * <i>int</i> - 1. + * + * 5.times do |i| + * print i, " " + * end + * + * <em>produces:</em> + * + * 0 1 2 3 4 + */ + +static VALUE +int_dotimes(num) + VALUE num; +{ + if (FIXNUM_P(num)) { + long i, end; + + end = FIX2LONG(num); + for (i=0; i<end; i++) { + rb_yield(LONG2FIX(i)); + } + } + else { + VALUE i = INT2FIX(0); + + for (;;) { + if (!RTEST(rb_funcall(i, '<', 1, num))) break; + rb_yield(i); + i = rb_funcall(i, '+', 1, INT2FIX(1)); + } + } + return num; +} + +/* + * call-seq: + * fix.zero? => true or false + * + * Returns <code>true</code> if <i>fix</i> is zero. + * + */ + +static VALUE +fix_zero_p(num) + VALUE num; +{ + if (FIX2LONG(num) == 0) { + return Qtrue; + } + return Qfalse; +} + +void +Init_Numeric() +{ +#if defined(__FreeBSD__) && __FreeBSD__ < 4 + /* allow divide by zero -- Inf */ + fpsetmask(fpgetmask() & ~(FP_X_DZ|FP_X_INV|FP_X_OFL)); +#elif defined(_UNICOSMP) + /* Turn off floating point exceptions for divide by zero, etc. */ + _set_Creg(0, 0); +#elif defined(__BORLANDC__) + /* Turn off floating point exceptions for overflow, etc. */ + _control87(MCW_EM, MCW_EM); +#endif + id_coerce = rb_intern("coerce"); + id_to_i = rb_intern("to_i"); + id_eq = rb_intern("=="); + + rb_eZeroDivError = rb_define_class("ZeroDivisionError", rb_eStandardError); + rb_eFloatDomainError = rb_define_class("FloatDomainError", rb_eRangeError); + rb_cNumeric = rb_define_class("Numeric", rb_cObject); + + rb_define_method(rb_cNumeric, "singleton_method_added", num_sadded, 1); + rb_include_module(rb_cNumeric, rb_mComparable); + rb_define_method(rb_cNumeric, "initialize_copy", num_init_copy, 1); + rb_define_method(rb_cNumeric, "coerce", num_coerce, 1); + + rb_define_method(rb_cNumeric, "+@", num_uplus, 0); + rb_define_method(rb_cNumeric, "-@", num_uminus, 0); + rb_define_method(rb_cNumeric, "<=>", num_cmp, 1); + rb_define_method(rb_cNumeric, "eql?", num_eql, 1); + rb_define_method(rb_cNumeric, "quo", num_quo, 1); + rb_define_method(rb_cNumeric, "div", num_div, 1); + rb_define_method(rb_cNumeric, "divmod", num_divmod, 1); + rb_define_method(rb_cNumeric, "modulo", num_modulo, 1); + rb_define_method(rb_cNumeric, "remainder", num_remainder, 1); + rb_define_method(rb_cNumeric, "abs", num_abs, 0); + rb_define_method(rb_cNumeric, "to_int", num_to_int, 0); + + rb_define_method(rb_cNumeric, "integer?", num_int_p, 0); + rb_define_method(rb_cNumeric, "zero?", num_zero_p, 0); + rb_define_method(rb_cNumeric, "nonzero?", num_nonzero_p, 0); + + rb_define_method(rb_cNumeric, "floor", num_floor, 0); + rb_define_method(rb_cNumeric, "ceil", num_ceil, 0); + rb_define_method(rb_cNumeric, "round", num_round, 0); + rb_define_method(rb_cNumeric, "truncate", num_truncate, 0); + rb_define_method(rb_cNumeric, "step", num_step, -1); + + rb_cInteger = rb_define_class("Integer", rb_cNumeric); + rb_undef_alloc_func(rb_cInteger); + rb_undef_method(CLASS_OF(rb_cInteger), "new"); + + rb_define_method(rb_cInteger, "integer?", int_int_p, 0); + rb_define_method(rb_cInteger, "upto", int_upto, 1); + rb_define_method(rb_cInteger, "downto", int_downto, 1); + rb_define_method(rb_cInteger, "times", int_dotimes, 0); + rb_include_module(rb_cInteger, rb_mPrecision); + rb_define_method(rb_cInteger, "succ", int_succ, 0); + rb_define_method(rb_cInteger, "next", int_succ, 0); + rb_define_method(rb_cInteger, "chr", int_chr, 0); + rb_define_method(rb_cInteger, "to_i", int_to_i, 0); + rb_define_method(rb_cInteger, "to_int", int_to_i, 0); + rb_define_method(rb_cInteger, "floor", int_to_i, 0); + rb_define_method(rb_cInteger, "ceil", int_to_i, 0); + rb_define_method(rb_cInteger, "round", int_to_i, 0); + rb_define_method(rb_cInteger, "truncate", int_to_i, 0); + + rb_cFixnum = rb_define_class("Fixnum", rb_cInteger); + rb_include_module(rb_cFixnum, rb_mPrecision); + rb_define_singleton_method(rb_cFixnum, "induced_from", rb_fix_induced_from, 1); + rb_define_singleton_method(rb_cInteger, "induced_from", rb_int_induced_from, 1); + + rb_define_method(rb_cFixnum, "to_s", fix_to_s, -1); + + rb_define_method(rb_cFixnum, "id2name", fix_id2name, 0); + rb_define_method(rb_cFixnum, "to_sym", fix_to_sym, 0); + + rb_define_method(rb_cFixnum, "-@", fix_uminus, 0); + rb_define_method(rb_cFixnum, "+", fix_plus, 1); + rb_define_method(rb_cFixnum, "-", fix_minus, 1); + rb_define_method(rb_cFixnum, "*", fix_mul, 1); + rb_define_method(rb_cFixnum, "/", fix_div, 1); + rb_define_method(rb_cFixnum, "div", fix_div, 1); + rb_define_method(rb_cFixnum, "%", fix_mod, 1); + rb_define_method(rb_cFixnum, "modulo", fix_mod, 1); + rb_define_method(rb_cFixnum, "divmod", fix_divmod, 1); + rb_define_method(rb_cFixnum, "quo", fix_quo, 1); + rb_define_method(rb_cFixnum, "**", fix_pow, 1); + + rb_define_method(rb_cFixnum, "abs", fix_abs, 0); + + rb_define_method(rb_cFixnum, "==", fix_equal, 1); + rb_define_method(rb_cFixnum, "<=>", fix_cmp, 1); + rb_define_method(rb_cFixnum, ">", fix_gt, 1); + rb_define_method(rb_cFixnum, ">=", fix_ge, 1); + rb_define_method(rb_cFixnum, "<", fix_lt, 1); + rb_define_method(rb_cFixnum, "<=", fix_le, 1); + + rb_define_method(rb_cFixnum, "~", fix_rev, 0); + rb_define_method(rb_cFixnum, "&", fix_and, 1); + rb_define_method(rb_cFixnum, "|", fix_or, 1); + rb_define_method(rb_cFixnum, "^", fix_xor, 1); + rb_define_method(rb_cFixnum, "[]", fix_aref, 1); + + rb_define_method(rb_cFixnum, "<<", fix_lshift, 1); + rb_define_method(rb_cFixnum, ">>", fix_rshift, 1); + + rb_define_method(rb_cFixnum, "to_f", fix_to_f, 0); + rb_define_method(rb_cFixnum, "size", fix_size, 0); + rb_define_method(rb_cFixnum, "zero?", fix_zero_p, 0); + + rb_cFloat = rb_define_class("Float", rb_cNumeric); + + rb_undef_alloc_func(rb_cFloat); + rb_undef_method(CLASS_OF(rb_cFloat), "new"); + + rb_define_singleton_method(rb_cFloat, "induced_from", rb_flo_induced_from, 1); + rb_include_module(rb_cFloat, rb_mPrecision); + + rb_define_const(rb_cFloat, "ROUNDS", INT2FIX(FLT_ROUNDS)); + rb_define_const(rb_cFloat, "RADIX", INT2FIX(FLT_RADIX)); + rb_define_const(rb_cFloat, "MANT_DIG", INT2FIX(DBL_MANT_DIG)); + rb_define_const(rb_cFloat, "DIG", INT2FIX(DBL_DIG)); + rb_define_const(rb_cFloat, "MIN_EXP", INT2FIX(DBL_MIN_EXP)); + rb_define_const(rb_cFloat, "MAX_EXP", INT2FIX(DBL_MAX_EXP)); + rb_define_const(rb_cFloat, "MIN_10_EXP", INT2FIX(DBL_MIN_10_EXP)); + rb_define_const(rb_cFloat, "MAX_10_EXP", INT2FIX(DBL_MAX_10_EXP)); + rb_define_const(rb_cFloat, "MIN", rb_float_new(DBL_MIN)); + rb_define_const(rb_cFloat, "MAX", rb_float_new(DBL_MAX)); + rb_define_const(rb_cFloat, "EPSILON", rb_float_new(DBL_EPSILON)); + + rb_define_method(rb_cFloat, "to_s", flo_to_s, 0); + rb_define_method(rb_cFloat, "coerce", flo_coerce, 1); + rb_define_method(rb_cFloat, "-@", flo_uminus, 0); + rb_define_method(rb_cFloat, "+", flo_plus, 1); + rb_define_method(rb_cFloat, "-", flo_minus, 1); + rb_define_method(rb_cFloat, "*", flo_mul, 1); + rb_define_method(rb_cFloat, "/", flo_div, 1); + rb_define_method(rb_cFloat, "%", flo_mod, 1); + rb_define_method(rb_cFloat, "modulo", flo_mod, 1); + rb_define_method(rb_cFloat, "divmod", flo_divmod, 1); + rb_define_method(rb_cFloat, "**", flo_pow, 1); + rb_define_method(rb_cFloat, "==", flo_eq, 1); + rb_define_method(rb_cFloat, "<=>", flo_cmp, 1); + rb_define_method(rb_cFloat, ">", flo_gt, 1); + rb_define_method(rb_cFloat, ">=", flo_ge, 1); + rb_define_method(rb_cFloat, "<", flo_lt, 1); + rb_define_method(rb_cFloat, "<=", flo_le, 1); + rb_define_method(rb_cFloat, "eql?", flo_eql, 1); + rb_define_method(rb_cFloat, "hash", flo_hash, 0); + rb_define_method(rb_cFloat, "to_f", flo_to_f, 0); + rb_define_method(rb_cFloat, "abs", flo_abs, 0); + rb_define_method(rb_cFloat, "zero?", flo_zero_p, 0); + + rb_define_method(rb_cFloat, "to_i", flo_truncate, 0); + rb_define_method(rb_cFloat, "to_int", flo_truncate, 0); + rb_define_method(rb_cFloat, "floor", flo_floor, 0); + rb_define_method(rb_cFloat, "ceil", flo_ceil, 0); + rb_define_method(rb_cFloat, "round", flo_round, 0); + rb_define_method(rb_cFloat, "truncate", flo_truncate, 0); + + rb_define_method(rb_cFloat, "nan?", flo_is_nan_p, 0); + rb_define_method(rb_cFloat, "infinite?", flo_is_infinite_p, 0); + rb_define_method(rb_cFloat, "finite?", flo_is_finite_p, 0); +} +/********************************************************************** + + object.c - + + $Author: matz $ + $Date: 2005/03/16 09:25:44 $ + created at: Thu Jul 15 12:01:24 JST 1993 + + Copyright (C) 1993-2003 Yukihiro Matsumoto + Copyright (C) 2000 Network Applied Communication Laboratory, Inc. + Copyright (C) 2000 Information-technology Promotion Agency, Japan + +**********************************************************************/ + +#include "ruby.h" +#include "st.h" +#include "util.h" +#include <stdio.h> +#include <errno.h> +#include <ctype.h> +#include <math.h> + +VALUE rb_mKernel; +VALUE rb_cObject; +VALUE rb_cModule; +VALUE rb_cClass; +VALUE rb_cData; + +VALUE rb_cNilClass; +VALUE rb_cTrueClass; +VALUE rb_cFalseClass; +VALUE rb_cSymbol; + +static ID id_eq, id_eql, id_inspect, id_init_copy; + +/* + * call-seq: + * obj === other => true or false + * + * Case Equality---For class <code>Object</code>, effectively the same + * as calling <code>#==</code>, but typically overridden by descendents + * to provide meaningful semantics in <code>case</code> statements. + */ + +VALUE +rb_equal(obj1, obj2) + VALUE obj1, obj2; +{ + VALUE result; + + if (obj1 == obj2) return Qtrue; + result = rb_funcall(obj1, id_eq, 1, obj2); + if (RTEST(result)) return Qtrue; + return Qfalse; +} + +int +rb_eql(obj1, obj2) + VALUE obj1, obj2; +{ + return RTEST(rb_funcall(obj1, id_eql, 1, obj2)); +} + +/* + * call-seq: + * obj == other => true or false + * obj.equal?(other) => true or false + * obj.eql?(other) => true or false + * + * Equality---At the <code>Object</code> level, <code>==</code> returns + * <code>true</code> only if <i>obj</i> and <i>other</i> are the + * same object. Typically, this method is overridden in descendent + * classes to provide class-specific meaning. + * + * Unlike <code>==</code>, the <code>equal?</code> method should never be + * overridden by subclasses: it is used to determine object identity + * (that is, <code>a.equal?(b)</code> iff <code>a</code> is the same + * object as <code>b</code>). + * + * The <code>eql?</code> method returns <code>true</code> if + <i>obj</i> and <i>anObject</i> have the + * same value. Used by <code>Hash</code> to test members for equality. + * For objects of class <code>Object</code>, <code>eql?</code> is + * synonymous with <code>==</code>. Subclasses normally continue this + * tradition, but there are exceptions. <code>Numeric</code> types, for + * example, perform type conversion across <code>==</code>, but not + * across <code>eql?</code>, so: + * + * 1 == 1.0 #=> true + * 1.eql? 1.0 #=> false + */ + +static VALUE +rb_obj_equal(obj1, obj2) + VALUE obj1, obj2; +{ + if (obj1 == obj2) return Qtrue; + return Qfalse; +} + + +/* + * Document-method: __id__ + * Document-method: object_id + * + * call-seq: + * obj.__id__ => fixnum + * obj.object_id => fixnum + * + * Returns an integer identifier for <i>obj</i>. The same number will + * be returned on all calls to <code>id</code> for a given object, and + * no two active objects will share an id. + * <code>Object#object_id</code> is a different concept from the + * <code>:name</code> notation, which returns the symbol id of + * <code>name</code>. Replaces the deprecated <code>Object#id</code>. + */ + + + +/* + * call-seq: + * obj.hash => fixnum + * + * Generates a <code>Fixnum</code> hash value for this object. This + * function must have the property that <code>a.eql?(b)</code> implies + * <code>a.hash == b.hash</code>. The hash value is used by class + * <code>Hash</code>. Any hash value that exceeds the capacity of a + * <code>Fixnum</code> will be truncated before being used. + */ + +VALUE +rb_obj_id(obj) + VALUE obj; +{ + if (SPECIAL_CONST_P(obj)) { + return LONG2NUM((long)obj); + } + return (VALUE)((long)obj|FIXNUM_FLAG); +} + +VALUE +rb_class_real(cl) + VALUE cl; +{ + while (FL_TEST(cl, FL_SINGLETON) || TYPE(cl) == T_ICLASS) { + cl = RCLASS(cl)->super; + } + return cl; +} + +/* + * call-seq: + * obj.class => class + * + * Returns the class of <i>obj</i>, now preferred over + * <code>Object#type</code>, as an object's type in Ruby is only + * loosely tied to that object's class. This method must always be + * called with an explicit receiver, as <code>class</code> is also a + * reserved word in Ruby. + * + * 1.class #=> Fixnum + * self.class #=> Object + */ + +VALUE +rb_obj_class(obj) + VALUE obj; +{ + return rb_class_real(CLASS_OF(obj)); +} + +static void +init_copy(dest, obj) + VALUE dest, obj; +{ + if (OBJ_FROZEN(dest)) { + rb_raise(rb_eTypeError, "[bug] frozen object (%s) allocated", rb_obj_classname(dest)); + } + RBASIC(dest)->flags &= ~(T_MASK|FL_EXIVAR); + RBASIC(dest)->flags |= RBASIC(obj)->flags & (T_MASK|FL_EXIVAR|FL_TAINT); + rb_copy_generic_ivar(dest, obj); + rb_gc_copy_finalizer(dest, obj); + switch (TYPE(obj)) { + case T_OBJECT: + case T_CLASS: + case T_MODULE: + if (ROBJECT(dest)->iv_tbl) { + st_free_table(ROBJECT(dest)->iv_tbl); + ROBJECT(dest)->iv_tbl = 0; + } + if (ROBJECT(obj)->iv_tbl) { + ROBJECT(dest)->iv_tbl = st_copy(ROBJECT(obj)->iv_tbl); + } + } + rb_funcall(dest, id_init_copy, 1, obj); +} + +/* + * call-seq: + * obj.clone -> an_object + * + * Produces a shallow copy of <i>obj</i>---the instance variables of + * <i>obj</i> are copied, but not the objects they reference. Copies + * the frozen and tainted state of <i>obj</i>. See also the discussion + * under <code>Object#dup</code>. + * + * class Klass + * attr_accessor :str + * end + * s1 = Klass.new #=> #<Klass:0x401b3a38> + * s1.str = "Hello" #=> "Hello" + * s2 = s1.clone #=> #<Klass:0x401b3998 @str="Hello"> + * s2.str[1,4] = "i" #=> "i" + * s1.inspect #=> "#<Klass:0x401b3a38 @str=\"Hi\">" + * s2.inspect #=> "#<Klass:0x401b3998 @str=\"Hi\">" + * + * This method may have class-specific behavior. If so, that + * behavior will be documented under the #+initialize_copy+ method of + * the class. + */ + +VALUE +rb_obj_clone(obj) + VALUE obj; +{ + VALUE clone; + + if (rb_special_const_p(obj)) { + rb_raise(rb_eTypeError, "can't clone %s", rb_obj_classname(obj)); + } + clone = rb_obj_alloc(rb_obj_class(obj)); + RBASIC(clone)->klass = rb_singleton_class_clone(obj); + RBASIC(clone)->flags = (RBASIC(obj)->flags | FL_TEST(clone, FL_TAINT)) & ~(FL_FREEZE|FL_FINALIZE); + init_copy(clone, obj); + RBASIC(clone)->flags |= RBASIC(obj)->flags & FL_FREEZE; + + return clone; +} + +/* + * call-seq: + * obj.dup -> an_object + * + * Produces a shallow copy of <i>obj</i>---the instance variables of + * <i>obj</i> are copied, but not the objects they reference. + * <code>dup</code> copies the tainted state of <i>obj</i>. See also + * the discussion under <code>Object#clone</code>. In general, + * <code>clone</code> and <code>dup</code> may have different semantics + * in descendent classes. While <code>clone</code> is used to duplicate + * an object, including its internal state, <code>dup</code> typically + * uses the class of the descendent object to create the new instance. + * + * This method may have class-specific behavior. If so, that + * behavior will be documented under the #+initialize_copy+ method of + * the class. + */ + +VALUE +rb_obj_dup(obj) + VALUE obj; +{ + VALUE dup; + + if (rb_special_const_p(obj)) { + rb_raise(rb_eTypeError, "can't dup %s", rb_obj_classname(obj)); + } + dup = rb_obj_alloc(rb_obj_class(obj)); + init_copy(dup, obj); + + return dup; +} + +/* :nodoc: */ +VALUE +rb_obj_init_copy(obj, orig) + VALUE obj, orig; +{ + if (obj == orig) return obj; + rb_check_frozen(obj); + if (TYPE(obj) != TYPE(orig) || rb_obj_class(obj) != rb_obj_class(orig)) { + rb_raise(rb_eTypeError, "initialize_copy should take same class object"); + } + return obj; +} + +/* + * call-seq: + * obj.to_s => string + * + * Returns a string representing <i>obj</i>. The default + * <code>to_s</code> prints the object's class and an encoding of the + * object id. As a special case, the top-level object that is the + * initial execution context of Ruby programs returns ``main.'' + */ + +VALUE +rb_any_to_s(obj) + VALUE obj; +{ + char *cname = rb_obj_classname(obj); + VALUE str; + + str = rb_str_new(0, strlen(cname)+6+16+1); /* 6:tags 16:addr 1:nul */ + sprintf(RSTRING(str)->ptr, "#<%s:0x%lx>", cname, obj); + RSTRING(str)->len = strlen(RSTRING(str)->ptr); + if (OBJ_TAINTED(obj)) OBJ_TAINT(str); + + return str; +} + +VALUE +rb_inspect(obj) + VALUE obj; +{ + return rb_obj_as_string(rb_funcall(obj, id_inspect, 0, 0)); +} + +static int +inspect_i(id, value, str) + ID id; + VALUE value; + VALUE str; +{ + VALUE str2; + char *ivname; + + /* need not to show internal data */ + if (CLASS_OF(value) == 0) return ST_CONTINUE; + if (!rb_is_instance_id(id)) return ST_CONTINUE; + if (RSTRING(str)->ptr[0] == '-') { /* first element */ + RSTRING(str)->ptr[0] = '#'; + rb_str_cat2(str, " "); + } + else { + rb_str_cat2(str, ", "); + } + ivname = rb_id2name(id); + rb_str_cat2(str, ivname); + rb_str_cat2(str, "="); + str2 = rb_inspect(value); + rb_str_append(str, str2); + OBJ_INFECT(str, str2); + + return ST_CONTINUE; +} + +static VALUE +inspect_obj(obj, str, recur) + VALUE obj, str; + int recur; +{ + if (recur) { + rb_str_cat2(str, " ..."); + } + else { + st_foreach_safe(ROBJECT(obj)->iv_tbl, inspect_i, str); + } + rb_str_cat2(str, ">"); + RSTRING(str)->ptr[0] = '#'; + OBJ_INFECT(str, obj); + + return str; +} + +/* + * call-seq: + * obj.inspect => string + * + * Returns a string containing a human-readable representation of + * <i>obj</i>. If not overridden, uses the <code>to_s</code> method to + * generate the string. + * + * [ 1, 2, 3..4, 'five' ].inspect #=> "[1, 2, 3..4, \"five\"]" + * Time.new.inspect #=> "Wed Apr 09 08:54:39 CDT 2003" + */ + + +static VALUE +rb_obj_inspect(obj) + VALUE obj; +{ + if (TYPE(obj) == T_OBJECT + && ROBJECT(obj)->iv_tbl + && ROBJECT(obj)->iv_tbl->num_entries > 0) { + VALUE str; + char *c; + + c = rb_obj_classname(obj); + str = rb_str_new(0, strlen(c)+10+16+1); /* 10:tags 16:addr 1:nul */ + sprintf(RSTRING(str)->ptr, "-<%s:0x%lx", c, obj); + RSTRING(str)->len = strlen(RSTRING(str)->ptr); + return rb_exec_recursive(inspect_obj, obj, str); + } + return rb_funcall(obj, rb_intern("to_s"), 0, 0); +} + + +/* + * call-seq: + * obj.instance_of?(class) => true or false + * + * Returns <code>true</code> if <i>obj</i> is an instance of the given + * class. See also <code>Object#kind_of?</code>. + */ + +VALUE +rb_obj_is_instance_of(obj, c) + VALUE obj, c; +{ + switch (TYPE(c)) { + case T_MODULE: + case T_CLASS: + case T_ICLASS: + break; + default: + rb_raise(rb_eTypeError, "class or module required"); + } + + if (rb_obj_class(obj) == c) return Qtrue; + return Qfalse; +} + + +/* + * call-seq: + * obj.is_a?(class) => true or false + * obj.kind_of?(class) => true or false + * + * Returns <code>true</code> if <i>class</i> is the class of + * <i>obj</i>, or if <i>class</i> is one of the superclasses of + * <i>obj</i> or modules included in <i>obj</i>. + * + * module M; end + * class A + * include M + * end + * class B < A; end + * class C < B; end + * b = B.new + * b.instance_of? A #=> false + * b.instance_of? B #=> true + * b.instance_of? C #=> false + * b.instance_of? M #=> false + * b.kind_of? A #=> true + * b.kind_of? B #=> true + * b.kind_of? C #=> false + * b.kind_of? M #=> true + */ + +VALUE +rb_obj_is_kind_of(obj, c) + VALUE obj, c; +{ + VALUE cl = CLASS_OF(obj); + + switch (TYPE(c)) { + case T_MODULE: + case T_CLASS: + case T_ICLASS: + break; + + default: + rb_raise(rb_eTypeError, "class or module required"); + } + + while (cl) { + if (cl == c || RCLASS(cl)->m_tbl == RCLASS(c)->m_tbl) + return Qtrue; + cl = RCLASS(cl)->super; + } + return Qfalse; +} + + +/* + * Document-method: singleton_method_added + * + * call-seq: + * singleton_method_added(symbol) + * + * Invoked as a callback whenever a singleton method is added to the + * receiver. + * + * module Chatty + * def Chatty.singleton_method_added(id) + * puts "Adding #{id.id2name}" + * end + * def self.one() end + * def two() end + * def Chatty.three() end + * end + * + * <em>produces:</em> + * + * Adding singleton_method_added + * Adding one + * Adding three + * + */ + +/* + * Document-method: singleton_method_removed + * + * call-seq: + * singleton_method_removed(symbol) + * + * Invoked as a callback whenever a singleton method is removed from + * the receiver. + * + * module Chatty + * def Chatty.singleton_method_removed(id) + * puts "Removing #{id.id2name}" + * end + * def self.one() end + * def two() end + * def Chatty.three() end + * class <<self + * remove_method :three + * remove_method :one + * end + * end + * + * <em>produces:</em> + * + * Removing three + * Removing one + */ + +/* + * Document-method: singleton_method_undefined + * + * call-seq: + * singleton_method_undefined(symbol) + * + * Invoked as a callback whenever a singleton method is undefined in + * the receiver. + * + * module Chatty + * def Chatty.singleton_method_undefined(id) + * puts "Undefining #{id.id2name}" + * end + * def Chatty.one() end + * class << self + * undef_method(:one) + * end + * end + * + * <em>produces:</em> + * + * Undefining one + */ + + +/* + * Document-method: included + * + * call-seq: + * included( othermod ) + * + * Callback invoked whenever the receiver is included in another + * module or class. This should be used in preference to + * <tt>Module.append_features</tt> if your code wants to perform some + * action when a module is included in another. + * + * module A + * def A.included(mod) + * puts "#{self} included in #{mod}" + * end + * end + * module Enumerable + * include A + * end + */ + + +/* + * Not documented + */ + +static VALUE +rb_obj_dummy() +{ + return Qnil; +} + + +/* + * call-seq: + * obj.tainted? => true or false + * + * Returns <code>true</code> if the object is tainted. + */ + +VALUE +rb_obj_tainted(obj) + VALUE obj; +{ + if (OBJ_TAINTED(obj)) + return Qtrue; + return Qfalse; +} + +/* + * call-seq: + * obj.taint -> obj + * + * Marks <i>obj</i> as tainted---if the <code>$SAFE</code> level is + * set appropriately, many method calls which might alter the running + * programs environment will refuse to accept tainted strings. + */ + +VALUE +rb_obj_taint(obj) + VALUE obj; +{ + rb_secure(4); + if (!OBJ_TAINTED(obj)) { + if (OBJ_FROZEN(obj)) { + rb_error_frozen("object"); + } + OBJ_TAINT(obj); + } + return obj; +} + + +/* + * call-seq: + * obj.untaint => obj + * + * Removes the taint from <i>obj</i>. + */ + +VALUE +rb_obj_untaint(obj) + VALUE obj; +{ + rb_secure(3); + if (OBJ_TAINTED(obj)) { + if (OBJ_FROZEN(obj)) { + rb_error_frozen("object"); + } + FL_UNSET(obj, FL_TAINT); + } + return obj; +} + +void +rb_obj_infect(obj1, obj2) + VALUE obj1, obj2; +{ + OBJ_INFECT(obj1, obj2); +} + + +/* + * call-seq: + * obj.freeze => obj + * + * Prevents further modifications to <i>obj</i>. A + * <code>TypeError</code> will be raised if modification is attempted. + * There is no way to unfreeze a frozen object. See also + * <code>Object#frozen?</code>. + * + * a = [ "a", "b", "c" ] + * a.freeze + * a << "z" + * + * <em>produces:</em> + * + * prog.rb:3:in `<<': can't modify frozen array (TypeError) + * from prog.rb:3 + */ + +VALUE +rb_obj_freeze(obj) + VALUE obj; +{ + if (!OBJ_FROZEN(obj)) { + if (rb_safe_level() >= 4 && !OBJ_TAINTED(obj)) { + rb_raise(rb_eSecurityError, "Insecure: can't freeze object"); + } + OBJ_FREEZE(obj); + } + return obj; +} + +/* + * call-seq: + * obj.frozen? => true or false + * + * Returns the freeze status of <i>obj</i>. + * + * a = [ "a", "b", "c" ] + * a.freeze #=> ["a", "b", "c"] + * a.frozen? #=> true + */ + +static VALUE +rb_obj_frozen_p(obj) + VALUE obj; +{ + if (OBJ_FROZEN(obj)) return Qtrue; + return Qfalse; +} + + +/* + * Document-class: NilClass + * + * The class of the singleton object <code>nil</code>. + */ + +/* + * call-seq: + * nil.to_i => 0 + * + * Always returns zero. + * + * nil.to_i #=> 0 + */ + + +static VALUE +nil_to_i(obj) + VALUE obj; +{ + return INT2FIX(0); +} + +/* + * call-seq: + * nil.to_f => 0.0 + * + * Always returns zero. + * + * nil.to_f #=> 0.0 + */ + +static VALUE +nil_to_f(obj) + VALUE obj; +{ + return rb_float_new(0.0); +} + +/* + * call-seq: + * nil.to_s => "" + * + * Always returns the empty string. + * + * nil.to_s #=> "" + */ + +static VALUE +nil_to_s(obj) + VALUE obj; +{ + return rb_str_new2(""); +} + +/* + * call-seq: + * nil.to_a => [] + * + * Always returns an empty array. + * + * nil.to_a #=> [] + */ + +static VALUE +nil_to_a(obj) + VALUE obj; +{ + return rb_ary_new2(0); +} + +/* + * call-seq: + * nil.inspect => "nil" + * + * Always returns the string "nil". + */ + +static VALUE +nil_inspect(obj) + VALUE obj; +{ + return rb_str_new2("nil"); +} + +#ifdef NIL_PLUS +static VALUE +nil_plus(x, y) + VALUE x, y; +{ + switch (TYPE(y)) { + case T_NIL: + case T_FIXNUM: + case T_FLOAT: + case T_BIGNUM: + case T_STRING: + case T_ARRAY: + return y; + default: + rb_raise(rb_eTypeError, "tried to add %s(%s) to nil", + RSTRING(rb_inspect(y))->ptr, + rb_obj_classname(y)); + } + /* not reached */ +} +#endif + +static VALUE +main_to_s(obj) + VALUE obj; +{ + return rb_str_new2("main"); +} + + +/*********************************************************************** + * Document-class: TrueClass + * + * The global value <code>true</code> is the only instance of class + * <code>TrueClass</code> and represents a logically true value in + * boolean expressions. The class provides operators allowing + * <code>true</code> to be used in logical expressions. + */ + + +/* + * call-seq: + * true.to_s => "true" + * + * The string representation of <code>true</code> is "true". + */ + +static VALUE +true_to_s(obj) + VALUE obj; +{ + return rb_str_new2("true"); +} + + +/* + * call-seq: + * true & obj => true or false + * + * And---Returns <code>false</code> if <i>obj</i> is + * <code>nil</code> or <code>false</code>, <code>true</code> otherwise. + */ + +static VALUE +true_and(obj, obj2) + VALUE obj, obj2; +{ + return RTEST(obj2)?Qtrue:Qfalse; +} + +/* + * call-seq: + * true | obj => true + * + * Or---Returns <code>true</code>. As <i>anObject</i> is an argument to + * a method call, it is always evaluated; there is no short-circuit + * evaluation in this case. + * + * true | puts("or") + * true || puts("logical or") + * + * <em>produces:</em> + * + * or + */ + +static VALUE +true_or(obj, obj2) + VALUE obj, obj2; +{ + return Qtrue; +} + + +/* + * call-seq: + * true ^ obj => !obj + * + * Exclusive Or---Returns <code>true</code> if <i>obj</i> is + * <code>nil</code> or <code>false</code>, <code>false</code> + * otherwise. + */ + +static VALUE +true_xor(obj, obj2) + VALUE obj, obj2; +{ + return RTEST(obj2)?Qfalse:Qtrue; +} + + +/* + * Document-class: FalseClass + * + * The global value <code>false</code> is the only instance of class + * <code>FalseClass</code> and represents a logically false value in + * boolean expressions. The class provides operators allowing + * <code>false</code> to participate correctly in logical expressions. + * + */ + +/* + * call-seq: + * false.to_s => "false" + * + * 'nuf said... + */ + +static VALUE +false_to_s(obj) + VALUE obj; +{ + return rb_str_new2("false"); +} + +/* + * call-seq: + * false & obj => false + * nil & obj => false + * + * And---Returns <code>false</code>. <i>obj</i> is always + * evaluated as it is the argument to a method call---there is no + * short-circuit evaluation in this case. + */ + +static VALUE +false_and(obj, obj2) + VALUE obj, obj2; +{ + return Qfalse; +} + + +/* + * call-seq: + * false | obj => true or false + * nil | obj => true or false + * + * Or---Returns <code>false</code> if <i>obj</i> is + * <code>nil</code> or <code>false</code>; <code>true</code> otherwise. + */ + +static VALUE +false_or(obj, obj2) + VALUE obj, obj2; +{ + return RTEST(obj2)?Qtrue:Qfalse; +} + + + +/* + * call-seq: + * false ^ obj => true or false + * nil ^ obj => true or false + * + * Exclusive Or---If <i>obj</i> is <code>nil</code> or + * <code>false</code>, returns <code>false</code>; otherwise, returns + * <code>true</code>. + * + */ + +static VALUE +false_xor(obj, obj2) + VALUE obj, obj2; +{ + return RTEST(obj2)?Qtrue:Qfalse; +} + +/* + * call_seq: + * nil.nil? => true + * + * Only the object <i>nil</i> responds <code>true</code> to <code>nil?</code>. + */ + +static VALUE +rb_true(obj) + VALUE obj; +{ + return Qtrue; +} + +/* + * call_seq: + * nil.nil? => true + * <anything_else>.nil? => false + * + * Only the object <i>nil</i> responds <code>true</code> to <code>nil?</code>. + */ + + +static VALUE +rb_false(obj) + VALUE obj; +{ + return Qfalse; +} + + +/* + * call-seq: + * obj =~ other => false + * + * Pattern Match---Overridden by descendents (notably + * <code>Regexp</code> and <code>String</code>) to provide meaningful + * pattern-match semantics. + */ + +static VALUE +rb_obj_pattern_match(obj1, obj2) + VALUE obj1, obj2; +{ + return Qfalse; +} + +/********************************************************************** + * Document-class: Symbol + * + * <code>Symbol</code> objects represent names and some strings + * inside the Ruby + * interpreter. They are generated using the <code>:name</code> and + * <code>:"string"</code> literals + * syntax, and by the various <code>to_sym</code> methods. The same + * <code>Symbol</code> object will be created for a given name or string + * for the duration of a program's execution, regardless of the context + * or meaning of that name. Thus if <code>Fred</code> is a constant in + * one context, a method in another, and a class in a third, the + * <code>Symbol</code> <code>:Fred</code> will be the same object in + * all three contexts. + * + * module One + * class Fred + * end + * $f1 = :Fred + * end + * module Two + * Fred = 1 + * $f2 = :Fred + * end + * def Fred() + * end + * $f3 = :Fred + * $f1.id #=> 2514190 + * $f2.id #=> 2514190 + * $f3.id #=> 2514190 + * + */ + +/* + * call-seq: + * sym.to_i => fixnum + * + * Returns an integer that is unique for each symbol within a + * particular execution of a program. + * + * :fred.to_i #=> 9809 + * "fred".to_sym.to_i #=> 9809 + */ + +static VALUE +sym_to_i(sym) + VALUE sym; +{ + ID id = SYM2ID(sym); + + return LONG2FIX(id); +} + + +/* + * call-seq: + * sym.inspect => string + * + * Returns the representation of <i>sym</i> as a symbol literal. + * + * :fred.inspect #=> ":fred" + */ + +static VALUE +sym_inspect(sym) + VALUE sym; +{ + VALUE str; + char *name; + ID id = SYM2ID(sym); + + name = rb_id2name(id); + str = rb_str_new(0, strlen(name)+1); + RSTRING(str)->ptr[0] = ':'; + strcpy(RSTRING(str)->ptr+1, name); + if (rb_is_junk_id(id)) { + str = rb_str_dump(str); + strncpy(RSTRING(str)->ptr, ":\"", 2); + } + return str; +} + + +/* + * call-seq: + * sym.id2name => string + * sym.to_s => string + * + * Returns the name or string corresponding to <i>sym</i>. + * + * :fred.id2name #=> "fred" + */ + + +static VALUE +sym_to_s(sym) + VALUE sym; +{ + return rb_str_new2(rb_id2name(SYM2ID(sym))); +} + + +/* + * call-seq: + * sym.to_sym => sym + * + * In general, <code>to_sym</code> returns the <code>Symbol</code> corresponding + * to an object. As <i>sym</i> is already a symbol, <code>self</code> is returned + * in this case. + */ + +static VALUE +sym_to_sym(sym) + VALUE sym; +{ + return sym; +} + + +/*********************************************************************** + * + * Document-class: Module + * + * A <code>Module</code> is a collection of methods and constants. The + * methods in a module may be instance methods or module methods. + * Instance methods appear as methods in a class when the module is + * included, module methods do not. Conversely, module methods may be + * called without creating an encapsulating object, while instance + * methods may not. (See <code>Module#module_function</code>) + * + * In the descriptions that follow, the parameter <i>syml</i> refers + * to a symbol, which is either a quoted string or a + * <code>Symbol</code> (such as <code>:name</code>). + * + * module Mod + * include Math + * CONST = 1 + * def meth + * # ... + * end + * end + * Mod.class #=> Module + * Mod.constants #=> ["E", "PI", "CONST"] + * Mod.instance_methods #=> ["meth"] + * + */ + +/* + * call-seq: + * mod.to_s => string + * + * Return a string representing this module or class. For basic + * classes and modules, this is the name. For singletons, we + * show information on the thing we're attached to as well. + */ + +static VALUE +rb_mod_to_s(klass) + VALUE klass; + +{ + if (FL_TEST(klass, FL_SINGLETON)) { + VALUE s = rb_str_new2("#<"); + VALUE v = rb_iv_get(klass, "__attached__"); + + rb_str_cat2(s, "Class:"); + switch (TYPE(v)) { + case T_CLASS: case T_MODULE: + rb_str_append(s, rb_inspect(v)); + break; + default: + rb_str_append(s, rb_any_to_s(v)); + break; + } + rb_str_cat2(s, ">"); + + return s; + } + return rb_str_dup(rb_class_name(klass)); +} + +/* + * call-seq: + * mod.freeze + * + * Prevents further modifications to <i>mod</i>. + */ + +static VALUE +rb_mod_freeze(mod) + VALUE mod; +{ + rb_mod_to_s(mod); + return rb_obj_freeze(mod); +} + +/* + * call-seq: + * mod === obj => true or false + * + * Case Equality---Returns <code>true</code> if <i>anObject</i> is an + * instance of <i>mod</i> or one of <i>mod</i>'s descendents. Of + * limited use for modules, but can be used in <code>case</code> + * statements to classify objects by class. + */ + +static VALUE +rb_mod_eqq(mod, arg) + VALUE mod, arg; +{ + return rb_obj_is_kind_of(arg, mod); +} + +/* + * call-seq: + * mod <= other => true, false, or nil + * + * Returns true if <i>mod</i> is a subclass of <i>other</i> or + * is the same as <i>other</i>. Returns + * <code>nil</code> if there's no relationship between the two. + * (Think of the relationship in terms of the class definition: + * "class A<B" implies "A<B"). + * + */ + +VALUE +rb_class_inherited_p(mod, arg) + VALUE mod, arg; +{ + VALUE start = mod; + + if (mod == arg) return Qtrue; + switch (TYPE(arg)) { + case T_MODULE: + case T_CLASS: + break; + default: + rb_raise(rb_eTypeError, "compared with non class/module"); + } + while (mod) { + if (RCLASS(mod)->m_tbl == RCLASS(arg)->m_tbl) + return Qtrue; + mod = RCLASS(mod)->super; + } + /* not mod < arg; check if mod > arg */ + while (arg) { + if (RCLASS(arg)->m_tbl == RCLASS(start)->m_tbl) + return Qfalse; + arg = RCLASS(arg)->super; + } + return Qnil; +} + +/* + * call-seq: + * mod < other => true, false, or nil + * + * Returns true if <i>mod</i> is a subclass of <i>other</i>. Returns + * <code>nil</code> if there's no relationship between the two. + * (Think of the relationship in terms of the class definition: + * "class A<B" implies "A<B"). + * + */ + +static VALUE +rb_mod_lt(mod, arg) + VALUE mod, arg; +{ + if (mod == arg) return Qfalse; + return rb_class_inherited_p(mod, arg); +} + + +/* + * call-seq: + * mod >= other => true, false, or nil + * + * Returns true if <i>mod</i> is an ancestor of <i>other</i>, or the + * two modules are the same. Returns + * <code>nil</code> if there's no relationship between the two. + * (Think of the relationship in terms of the class definition: + * "class A<B" implies "B>A"). + * + */ + +static VALUE +rb_mod_ge(mod, arg) + VALUE mod, arg; +{ + switch (TYPE(arg)) { + case T_MODULE: + case T_CLASS: + break; + default: + rb_raise(rb_eTypeError, "compared with non class/module"); + } + + return rb_class_inherited_p(arg, mod); +} + +/* + * call-seq: + * mod > other => true, false, or nil + * + * Returns true if <i>mod</i> is an ancestor of <i>other</i>. Returns + * <code>nil</code> if there's no relationship between the two. + * (Think of the relationship in terms of the class definition: + * "class A<B" implies "B>A"). + * + */ + +static VALUE +rb_mod_gt(mod, arg) + VALUE mod, arg; +{ + if (mod == arg) return Qfalse; + return rb_mod_ge(mod, arg); +} + +/* + * call-seq: + * mod <=> other_mod => -1, 0, +1, or nil + * + * Comparison---Returns -1 if <i>mod</i> includes <i>other_mod</i>, 0 if + * <i>mod</i> is the same as <i>other_mod</i>, and +1 if <i>mod</i> is + * included by <i>other_mod</i> or if <i>mod</i> has no relationship with + * <i>other_mod</i>. Returns <code>nil</code> if <i>other_mod</i> is + * not a module. + */ + +static VALUE +rb_mod_cmp(mod, arg) + VALUE mod, arg; +{ + VALUE cmp; + + if (mod == arg) return INT2FIX(0); + switch (TYPE(arg)) { + case T_MODULE: + case T_CLASS: + break; + default: + return Qnil; + } + + cmp = rb_class_inherited_p(mod, arg); + if (NIL_P(cmp)) return Qnil; + if (cmp) { + return INT2FIX(-1); + } + return INT2FIX(1); +} + +static VALUE rb_module_s_alloc _((VALUE)); +static VALUE +rb_module_s_alloc(klass) + VALUE klass; +{ + VALUE mod = rb_module_new(); + + RBASIC(mod)->klass = klass; + return mod; +} + +static VALUE rb_class_s_alloc _((VALUE)); +static VALUE +rb_class_s_alloc(klass) + VALUE klass; +{ + return rb_class_boot(0); +} + +/* + * call-seq: + * Module.new => mod + * Module.new {|mod| block } => mod + * + * Creates a new anonymous module. If a block is given, it is passed + * the module object, and the block is evaluated in the context of this + * module using <code>module_eval</code>. + * + * Fred = Module.new do + * def meth1 + * "hello" + * end + * def meth2 + * "bye" + * end + * end + * a = "my string" + * a.extend(Fred) #=> "my string" + * a.meth1 #=> "hello" + * a.meth2 #=> "bye" + */ + +static VALUE +rb_mod_initialize(module) + VALUE module; +{ + if (rb_block_given_p()) { + rb_mod_module_eval(0, 0, module); + } + return Qnil; +} + +/* + * call-seq: + * Class.new(super_class=Object) => a_class + * + * Creates a new anonymous (unnamed) class with the given superclass + * (or <code>Object</code> if no parameter is given). You can give a + * class a name by assigning the class object to a constant. + * + */ + +static VALUE +rb_class_initialize(argc, argv, klass) + int argc; + VALUE *argv; + VALUE klass; +{ + VALUE super; + + if (RCLASS(klass)->super != 0) { + rb_raise(rb_eTypeError, "already initialized class"); + } + if (rb_scan_args(argc, argv, "01", &super) == 0) { + super = rb_cObject; + } + else { + rb_check_inheritable(super); + } + RCLASS(klass)->super = super; + rb_make_metaclass(klass, RBASIC(super)->klass); + rb_class_inherited(super, klass); + rb_mod_initialize(klass); + + return klass; +} + +/* + * call-seq: + * class.allocate() => obj + * + * Allocates space for a new object of <i>class</i>'s class. The + * returned object must be an instance of <i>class</i>. + * + */ + +VALUE +rb_obj_alloc(klass) + VALUE klass; +{ + VALUE obj; + + if (RCLASS(klass)->super == 0) { + rb_raise(rb_eTypeError, "can't instantiate uninitialized class"); + } + if (FL_TEST(klass, FL_SINGLETON)) { + rb_raise(rb_eTypeError, "can't create instance of singleton class"); + } + obj = rb_funcall(klass, ID_ALLOCATOR, 0, 0); + if (rb_obj_class(obj) != rb_class_real(klass)) { + rb_raise(rb_eTypeError, "wrong instance allocation"); + } + return obj; +} + +static VALUE rb_class_allocate_instance _((VALUE)); +static VALUE +rb_class_allocate_instance(klass) + VALUE klass; +{ + NEWOBJ(obj, struct RObject); + OBJSETUP(obj, klass, T_OBJECT); + return (VALUE)obj; +} + +/* + * call-seq: + * class.new(args, ...) => obj + * + * Calls <code>allocate</code> to create a new object of + * <i>class</i>'s class, then invokes that object's + * <code>initialize</code> method, passing it <i>args</i>. + * This is the method that ends up getting called whenever + * an object is constructed using .new. + * + */ + +VALUE +rb_class_new_instance(argc, argv, klass) + int argc; + VALUE *argv; + VALUE klass; +{ + VALUE obj; + + obj = rb_obj_alloc(klass); + rb_obj_call_init(obj, argc, argv); + + return obj; +} + +/* + * call-seq: + * class.superclass -> a_super_class or nil + * + * Returns the superclass of <i>class</i>, or <code>nil</code>. + * + * File.superclass #=> IO + * IO.superclass #=> Object + * Object.superclass #=> nil + * + */ + +static VALUE +rb_class_superclass(klass) + VALUE klass; +{ + VALUE super = RCLASS(klass)->super; + + if (!super) { + rb_raise(rb_eTypeError, "uninitialized class"); + } + while (TYPE(super) == T_ICLASS) { + super = RCLASS(super)->super; + } + if (!super) { + return Qnil; + } + return super; +} + +static ID +str_to_id(str) + VALUE str; +{ + if (!RSTRING(str)->ptr || RSTRING(str)->len == 0) { + rb_raise(rb_eArgError, "empty symbol string"); + } + if (RSTRING(str)->len != strlen(RSTRING(str)->ptr)) { + rb_raise(rb_eArgError, "Symbols should not contain NUL (\\0)"); + } + return rb_intern(RSTRING(str)->ptr); +} + +ID +rb_to_id(name) + VALUE name; +{ + VALUE tmp; + ID id; + + switch (TYPE(name)) { + case T_STRING: + return str_to_id(name); + case T_FIXNUM: + rb_warn("do not use Fixnums as Symbols"); + id = FIX2LONG(name); + if (!rb_id2name(id)) { + rb_raise(rb_eArgError, "%ld is not a symbol", id); + } + break; + case T_SYMBOL: + id = SYM2ID(name); + break; + default: + tmp = rb_check_string_type(name); + if (!NIL_P(tmp)) { + return str_to_id(tmp); + } + rb_raise(rb_eTypeError, "%s is not a symbol", RSTRING(rb_inspect(name))->ptr); + } + return id; +} + +/* + * call-seq: + * attr(symbol, writable=false) => nil + * + * Defines a named attribute for this module, where the name is + * <i>symbol.</i><code>id2name</code>, creating an instance variable + * (<code>@name</code>) and a corresponding access method to read it. + * If the optional <i>writable</i> argument is <code>true</code>, also + * creates a method called <code>name=</code> to set the attribute. + * + * module Mod + * attr :size, true + * end + * + * <em>is equivalent to:</em> + * + * module Mod + * def size + * @size + * end + * def size=(val) + * @size = val + * end + * end + */ + +static VALUE +rb_mod_attr(argc, argv, klass) + int argc; + VALUE *argv; + VALUE klass; +{ + VALUE name, pub; + + rb_scan_args(argc, argv, "11", &name, &pub); + rb_attr(klass, rb_to_id(name), 1, RTEST(pub), Qtrue); + return Qnil; +} + +/* + * call-seq: + * attr_reader(symbol, ...) => nil + * + * Creates instance variables and corresponding methods that return the + * value of each instance variable. Equivalent to calling + * ``<code>attr</code><i>:name</i>'' on each name in turn. + */ + +static VALUE +rb_mod_attr_reader(argc, argv, klass) + int argc; + VALUE *argv; + VALUE klass; +{ + int i; + + for (i=0; i<argc; i++) { + rb_attr(klass, rb_to_id(argv[i]), 1, 0, Qtrue); + } + return Qnil; +} + +/* + * call-seq: + * attr_writer(symbol, ...) => nil + * + * Creates an accessor method to allow assignment to the attribute + * <i>aSymbol</i><code>.id2name</code>. + */ + +static VALUE +rb_mod_attr_writer(argc, argv, klass) + int argc; + VALUE *argv; + VALUE klass; +{ + int i; + + for (i=0; i<argc; i++) { + rb_attr(klass, rb_to_id(argv[i]), 0, 1, Qtrue); + } + return Qnil; +} + +/* + * call-seq: + * attr_accessor(symbol, ...) => nil + * + * Equivalent to calling ``<code>attr</code><i>symbol</i><code>, + * true</code>'' on each <i>symbol</i> in turn. + * + * module Mod + * attr_accessor(:one, :two) + * end + * Mod.instance_methods.sort #=> ["one", "one=", "two", "two="] + */ + +static VALUE +rb_mod_attr_accessor(argc, argv, klass) + int argc; + VALUE *argv; + VALUE klass; +{ + int i; + + for (i=0; i<argc; i++) { + rb_attr(klass, rb_to_id(argv[i]), 1, 1, Qtrue); + } + return Qnil; +} + +/* + * call-seq: + * mod.const_get(sym) => obj + * + * Returns the value of the named constant in <i>mod</i>. + * + * Math.const_get(:PI) #=> 3.14159265358979 + */ + +static VALUE +rb_mod_const_get(mod, name) + VALUE mod, name; +{ + ID id = rb_to_id(name); + + if (!rb_is_const_id(id)) { + rb_name_error(id, "wrong constant name %s", rb_id2name(id)); + } + return rb_const_get(mod, id); +} + +/* + * call-seq: + * mod.const_set(sym, obj) => obj + * + * Sets the named constant to the given object, returning that object. + * Creates a new constant if no constant with the given name previously + * existed. + * + * Math.const_set("HIGH_SCHOOL_PI", 22.0/7.0) #=> 3.14285714285714 + * Math::HIGH_SCHOOL_PI - Math::PI #=> 0.00126448926734968 + */ + +static VALUE +rb_mod_const_set(mod, name, value) + VALUE mod, name, value; +{ + ID id = rb_to_id(name); + + if (!rb_is_const_id(id)) { + rb_name_error(id, "wrong constant name %s", rb_id2name(id)); + } + rb_const_set(mod, id, value); + return value; +} + +/* + * call-seq: + * mod.const_defined?(sym) => true or false + * + * Returns <code>true</code> if a constant with the given name is + * defined by <i>mod</i>. + * + * Math.const_defined? "PI" #=> true + */ + +static VALUE +rb_mod_const_defined(mod, name) + VALUE mod, name; +{ + ID id = rb_to_id(name); + + if (!rb_is_const_id(id)) { + rb_name_error(id, "wrong constant name %s", rb_id2name(id)); + } + return rb_const_defined_at(mod, id); +} + +/* + * call-seq: + * obj.methods => array + * + * Returns a list of the names of methods publicly accessible in + * <i>obj</i>. This will include all the methods accessible in + * <i>obj</i>'s ancestors. + * + * class Klass + * def kMethod() + * end + * end + * k = Klass.new + * k.methods[0..9] #=> ["kMethod", "freeze", "nil?", "is_a?", + * "class", "instance_variable_set", + * "methods", "extend", "__send__", "instance_eval"] + * k.methods.length #=> 42 + */ + + +static VALUE +rb_obj_methods(argc, argv, obj) + int argc; + VALUE *argv; + VALUE obj; +{ + retry: + if (argc == 0) { + VALUE args[1]; + + args[0] = Qtrue; + return rb_class_instance_methods(1, args, CLASS_OF(obj)); + } + else { + VALUE recur; + + rb_scan_args(argc, argv, "1", &recur); + if (RTEST(recur)) { + argc = 0; + goto retry; + } + return rb_obj_singleton_methods(argc, argv, obj); + } +} + +/* + * call-seq: + * obj.protected_methods(all=true) => array + * + * Returns the list of protected methods accessible to <i>obj</i>. If + * the <i>all</i> parameter is set to <code>false</code>, only those methods + * in the receiver will be listed. + */ + +static VALUE +rb_obj_protected_methods(argc, argv, obj) + int argc; + VALUE *argv; + VALUE obj; +{ + if (argc == 0) { /* hack to stop warning */ + VALUE args[1]; + + args[0] = Qtrue; + return rb_class_protected_instance_methods(1, args, CLASS_OF(obj)); + } + return rb_class_protected_instance_methods(argc, argv, CLASS_OF(obj)); +} + +/* + * call-seq: + * obj.private_methods(all=true) => array + * + * Returns the list of private methods accessible to <i>obj</i>. If + * the <i>all</i> parameter is set to <code>false</code>, only those methods + * in the receiver will be listed. + */ + +static VALUE +rb_obj_private_methods(argc, argv, obj) + int argc; + VALUE *argv; + VALUE obj; +{ + if (argc == 0) { /* hack to stop warning */ + VALUE args[1]; + + args[0] = Qtrue; + return rb_class_private_instance_methods(1, args, CLASS_OF(obj)); + } + return rb_class_private_instance_methods(argc, argv, CLASS_OF(obj)); +} + +/* + * call-seq: + * obj.public_methods(all=true) => array + * + * Returns the list of public methods accessible to <i>obj</i>. If + * the <i>all</i> parameter is set to <code>false</code>, only those methods + * in the receiver will be listed. + */ + +static VALUE +rb_obj_public_methods(argc, argv, obj) + int argc; + VALUE *argv; + VALUE obj; +{ + if (argc == 0) { /* hack to stop warning */ + VALUE args[1]; + + args[0] = Qtrue; + return rb_class_public_instance_methods(1, args, CLASS_OF(obj)); + } + return rb_class_public_instance_methods(argc, argv, CLASS_OF(obj)); +} + +/* + * call-seq: + * obj.instance_variable_get(symbol) => obj + * + * Returns the value of the given instance variable (or throws a + * <code>NameError</code> exception). The <code>@</code> part of the + * variable name should be included for regular instance variables + * + * class Fred + * def initialize(p1, p2) + * @a, @b = p1, p2 + * end + * end + * fred = Fred.new('cat', 99) + * fred.instance_variable_get(:@a) #=> "cat" + * fred.instance_variable_get("@b") #=> 99 + */ + +static VALUE +rb_obj_ivar_get(obj, iv) + VALUE obj, iv; +{ + ID id = rb_to_id(iv); + + if (!rb_is_instance_id(id)) { + rb_name_error(id, "`%s' is not allowed as an instance variable name", rb_id2name(id)); + } + return rb_ivar_get(obj, id); +} + + +/* + * call-seq: + * obj.instance_variable_set(symbol, obj) => obj + * + * Sets the instance variable names by <i>symbol</i> to + * <i>object</i>, thereby frustrating the efforts of the class's + * author to attempt to provide proper encapsulation. The variable + * did not have to exist prior to this call. + * + * class Fred + * def initialize(p1, p2) + * @a, @b = p1, p2 + * end + * end + * fred = Fred.new('cat', 99) + * fred.instance_variable_set(:@a, 'dog') #=> "dog" + * fred.instance_variable_set(:@c, 'cat') #=> "cat" + * fred.inspect #=> "#<Fred:0x401b3da8 @a=\"dog\", @b=99, @c=\"cat\">" + */ + +static VALUE +rb_obj_ivar_set(obj, iv, val) + VALUE obj, iv, val; +{ + ID id = rb_to_id(iv); + + if (!rb_is_instance_id(id)) { + rb_name_error(id, "`%s' is not allowed as an instance variable name", rb_id2name(id)); + } + return rb_ivar_set(obj, id, val); +} + +/* + * call-seq: + * mod.class_variable_get(symbol) => obj + * + * Returns the value of the given class variable (or throws a + * <code>NameError</code> exception). The <code>@@</code> part of the + * variable name should be included for regular class variables + * + * class Fred + * @@foo = 99 + * end + * Fred.class_variable_get(:@foo) #=> 99 + */ + +static VALUE +rb_mod_cvar_get(obj, iv) + VALUE obj, iv; +{ + ID id = rb_to_id(iv); + + if (!rb_is_class_id(id)) { + rb_name_error(id, "`%s' is not allowed as an class variable name", rb_id2name(id)); + } + return rb_cvar_get(obj, id); +} + + +/* + * call-seq: + * obj.class_variable_set(symbol, obj) => obj + * + * Sets the class variable names by <i>symbol</i> to + * <i>object</i>. + * + * class Fred + * @@foo = 99 + * def foo + * @@foo + * end + * end + * Fred.class_variable_set(:@foo, 101) #=> 101 + * Fred.new.foo #=> 101 + */ + +static VALUE +rb_mod_cvar_set(obj, iv, val) + VALUE obj, iv, val; +{ + ID id = rb_to_id(iv); + + if (!rb_is_class_id(id)) { + rb_name_error(id, "`%s' is not allowed as an class variable name", rb_id2name(id)); + } + rb_cvar_set(obj, id, val, Qfalse); + return val; +} + +static VALUE +convert_type(val, tname, method, raise) + VALUE val; + const char *tname, *method; + int raise; +{ + ID m; + + m = rb_intern(method); + if (!rb_respond_to(val, m)) { + if (raise) { + rb_raise(rb_eTypeError, "can't convert %s into %s", + NIL_P(val) ? "nil" : + val == Qtrue ? "true" : + val == Qfalse ? "false" : + rb_obj_classname(val), + tname); + } + else { + return Qnil; + } + } + return rb_funcall(val, m, 0); +} + +VALUE +rb_convert_type(val, type, tname, method) + VALUE val; + int type; + const char *tname, *method; +{ + VALUE v; + + if (TYPE(val) == type) return val; + v = convert_type(val, tname, method, Qtrue); + if (TYPE(v) != type) { + char *cname = rb_obj_classname(val); + rb_raise(rb_eTypeError, "can't convert %s to %s (%s#%s gives %s)", + cname, tname, cname, method, rb_obj_classname(v)); + } + return v; +} + +VALUE +rb_check_convert_type(val, type, tname, method) + VALUE val; + int type; + const char *tname, *method; +{ + VALUE v; + + /* always convert T_DATA */ + if (TYPE(val) == type && type != T_DATA) return val; + v = convert_type(val, tname, method, Qfalse); + if (NIL_P(v)) return Qnil; + if (TYPE(v) != type) { + char *cname = rb_obj_classname(val); + rb_raise(rb_eTypeError, "can't convert %s to %s (%s#%s gives %s)", + cname, tname, cname, method, rb_obj_classname(v)); + } + return v; +} + + +static VALUE +rb_to_integer(val, method) + VALUE val; + char *method; +{ + VALUE v = convert_type(val, "Integer", method, Qtrue); + if (!rb_obj_is_kind_of(v, rb_cInteger)) { + char *cname = rb_obj_classname(val); + rb_raise(rb_eTypeError, "can't convert %s to Integer (%s#%s gives %s)", + cname, cname, method, rb_obj_classname(v)); + } + return v; +} + +VALUE +rb_to_int(val) + VALUE val; +{ + return rb_to_integer(val, "to_int"); +} + +VALUE +rb_Integer(val) + VALUE val; +{ + VALUE tmp; + + switch (TYPE(val)) { + case T_FLOAT: + if (RFLOAT(val)->value <= (double)FIXNUM_MAX + && RFLOAT(val)->value >= (double)FIXNUM_MIN) { + break; + } + return rb_dbl2big(RFLOAT(val)->value); + + case T_FIXNUM: + case T_BIGNUM: + return val; + + case T_STRING: + return rb_str_to_inum(val, 0, Qtrue); + + default: + break; + } + tmp = convert_type(val, "Integer", "to_int", Qfalse); + if (NIL_P(tmp)) { + return rb_to_integer(val, "to_i"); + } + return tmp; +} + +/* + * call-seq: + * Integer(arg) => integer + * + * Converts <i>arg</i> to a <code>Fixnum</code> or <code>Bignum</code>. + * Numeric types are converted directly (with floating point numbers + * being truncated). If <i>arg</i> is a <code>String</code>, leading + * radix indicators (<code>0</code>, <code>0b</code>, and + * <code>0x</code>) are honored. Others are converted using + * <code>to_int</code> and <code>to_i</code>. This behavior is + * different from that of <code>String#to_i</code>. + * + * Integer(123.999) #=> 123 + * Integer("0x1a") #=> 26 + * Integer(Time.new) #=> 1049896590 + */ + +static VALUE +rb_f_integer(obj, arg) + VALUE obj, arg; +{ + return rb_Integer(arg); +} + +double +rb_cstr_to_dbl(p, badcheck) + const char *p; + int badcheck; +{ + const char *q; + char *end; + double d; + + if (!p) return 0.0; + q = p; + if (badcheck) { + while (ISSPACE(*p)) p++; + } + else { + while (ISSPACE(*p) || *p == '_') p++; + } + d = strtod(p, &end); + if (errno == ERANGE) { + rb_warn("Float %*s out of range", end-p, p); + errno = 0; + } + if (p == end) { + if (badcheck) { + bad: + rb_invalid_str(q, "Float()"); + } + return d; + } + if (*end) { + char *buf = ALLOCA_N(char, strlen(p)+1); + char *n = buf; + + while (p < end) *n++ = *p++; + while (*p) { + if (*p == '_') { + /* remove underscores between digits */ + if (badcheck) { + if (n == buf || !ISDIGIT(n[-1])) goto bad; + ++p; + if (!ISDIGIT(*p)) goto bad; + } + else { + while (*++p == '_'); + continue; + } + } + *n++ = *p++; + } + *n = '\0'; + p = buf; + d = strtod(p, &end); + if (errno == ERANGE) { + rb_warn("Float %*s out of range", end-p, p); + errno = 0; + } + if (badcheck) { + if (p == end) goto bad; + while (*end && ISSPACE(*end)) end++; + if (*end) goto bad; + } + } + if (errno == ERANGE) { + errno = 0; + rb_raise(rb_eArgError, "Float %s out of range", q); + } + return d; +} + +double +rb_str_to_dbl(str, badcheck) + VALUE str; + int badcheck; +{ + char *s; + long len; + + StringValue(str); + s = RSTRING(str)->ptr; + len = RSTRING(str)->len; + if (s) { + if (s[len]) { /* no sentinel somehow */ + char *p = ALLOCA_N(char, len+1); + + MEMCPY(p, s, char, len); + p[len] = '\0'; + s = p; + } + if (badcheck && len != strlen(s)) { + rb_raise(rb_eArgError, "string for Float contains null byte"); + } + } + return rb_cstr_to_dbl(s, badcheck); +} + +VALUE +rb_Float(val) + VALUE val; +{ + switch (TYPE(val)) { + case T_FIXNUM: + return rb_float_new((double)FIX2LONG(val)); + + case T_FLOAT: + return val; + + case T_BIGNUM: + return rb_float_new(rb_big2dbl(val)); + + case T_STRING: + return rb_float_new(rb_str_to_dbl(val, Qtrue)); + + case T_NIL: + rb_raise(rb_eTypeError, "can't convert nil into Float"); + break; + + default: + { + VALUE f = rb_convert_type(val, T_FLOAT, "Float", "to_f"); + if (isnan(RFLOAT(f)->value)) { + rb_raise(rb_eArgError, "invalid value for Float()"); + } + return f; + } + } +} + +/* + * call-seq: + * Float(arg) => float + * + * Returns <i>arg</i> converted to a float. Numeric types are converted + * directly, the rest are converted using <i>arg</i>.to_f. As of Ruby + * 1.8, converting <code>nil</code> generates a <code>TypeError</code>. + * + * Float(1) #=> 1.0 + * Float("123.456") #=> 123.456 + */ + +static VALUE +rb_f_float(obj, arg) + VALUE obj, arg; +{ + return rb_Float(arg); +} + +double +rb_num2dbl(val) + VALUE val; +{ + switch (TYPE(val)) { + case T_FLOAT: + return RFLOAT(val)->value; + + case T_STRING: + rb_raise(rb_eTypeError, "no implicit conversion to float from string"); + break; + + case T_NIL: + rb_raise(rb_eTypeError, "no implicit conversion to float from nil"); + break; + + default: + break; + } + + return RFLOAT(rb_Float(val))->value; +} + +char* +rb_str2cstr(str, len) + VALUE str; + long *len; +{ + StringValue(str); + if (len) *len = RSTRING(str)->len; + else if (RTEST(ruby_verbose) && RSTRING(str)->len != strlen(RSTRING(str)->ptr)) { + rb_warn("string contains \\0 character"); + } + return RSTRING(str)->ptr; +} + +VALUE +rb_String(val) + VALUE val; +{ + return rb_convert_type(val, T_STRING, "String", "to_s"); +} + + +/* + * call-seq: + * String(arg) => string + * + * Converts <i>arg</i> to a <code>String</code> by calling its + * <code>to_s</code> method. + * + * String(self) #=> "main" + * String(self.class #=> "Object" + * String(123456) #=> "123456" + */ + +static VALUE +rb_f_string(obj, arg) + VALUE obj, arg; +{ + return rb_String(arg); +} + +VALUE +rb_Array(val) + VALUE val; +{ + VALUE tmp = rb_check_array_type(val); + + if (NIL_P(tmp)) { + tmp = rb_check_convert_type(val, T_ARRAY, "Array", "to_a"); + if (NIL_P(tmp)) { + return rb_ary_new3(1, val); + } + } + return tmp; +} + +/* + * call-seq: + * Array(arg) => array + * + * Returns <i>arg</i> as an <code>Array</code>. First tries to call + * <i>arg</i><code>.to_ary</code>, then <i>arg</i><code>.to_a</code>. + * If both fail, creates a single element array containing <i>arg</i> + * (unless <i>arg</i> is <code>nil</code>). + * + * Array(1..5) #=> [1, 2, 3, 4, 5] + */ + +static VALUE +rb_f_array(obj, arg) + VALUE obj, arg; +{ + return rb_Array(arg); +} + +static VALUE +boot_defclass(name, super) + char *name; + VALUE super; +{ + extern st_table *rb_class_tbl; + VALUE obj = rb_class_boot(super); + ID id = rb_intern(name); + + rb_name_class(obj, id); + st_add_direct(rb_class_tbl, id, obj); + rb_const_set((rb_cObject ? rb_cObject : obj), id, obj); + return obj; +} + +VALUE ruby_top_self; + +/* + * Document-class: Class + * + * Classes in Ruby are first-class objects---each is an instance of + * class <code>Class</code>. + * + * When a new class is created (typically using <code>class Name ... + * end</code>), an object of type <code>Class</code> is created and + * assigned to a global constant (<code>Name</code> in this case). When + * <code>Name.new</code> is called to create a new object, the + * <code>new</code> method in <code>Class</code> is run by default. + * This can be demonstrated by overriding <code>new</code> in + * <code>Class</code>: + * + * class Class + * alias oldNew new + * def new(*args) + * print "Creating a new ", self.name, "\n" + * oldNew(*args) + * end + * end + * + * + * class Name + * end + * + * + * n = Name.new + * + * <em>produces:</em> + * + * Creating a new Name + * + * Classes, modules, and objects are interrelated. In the diagram + * that follows, the arrows represent inheritance, and the + * parentheses meta-classes. All metaclasses are instances + * of the class `Class'. + * + * +------------------+ + * | | + * Object---->(Object) | + * ^ ^ ^ ^ | + * | | | | | + * | | +-----+ +---------+ | + * | | | | | + * | +-----------+ | | + * | | | | | + * +------+ | Module--->(Module) | + * | | ^ ^ | + * OtherClass-->(OtherClass) | | | + * | | | + * Class---->(Class) | + * ^ | + * | | + * +----------------+ + */ + + +/* + * <code>Object</code> is the parent class of all classes in Ruby. Its + * methods are therefore available to all objects unless explicitly + * overridden. + * + * <code>Object</code> mixes in the <code>Kernel</code> module, making + * the built-in kernel functions globally accessible. Although the + * instance methods of <code>Object</code> are defined by the + * <code>Kernel</code> module, we have chosen to document them here for + * clarity. + * + * In the descriptions of Object's methods, the parameter <i>symbol</i> refers + * to a symbol, which is either a quoted string or a + * <code>Symbol</code> (such as <code>:name</code>). + */ + +void +Init_Object() +{ + VALUE metaclass; + + rb_cObject = boot_defclass("Object", 0); + rb_cModule = boot_defclass("Module", rb_cObject); + rb_cClass = boot_defclass("Class", rb_cModule); + + metaclass = rb_make_metaclass(rb_cObject, rb_cClass); + metaclass = rb_make_metaclass(rb_cModule, metaclass); + metaclass = rb_make_metaclass(rb_cClass, metaclass); + + rb_mKernel = rb_define_module("Kernel"); + rb_include_module(rb_cObject, rb_mKernel); + rb_define_alloc_func(rb_cObject, rb_class_allocate_instance); + rb_define_private_method(rb_cObject, "initialize", rb_obj_dummy, 0); + rb_define_private_method(rb_cClass, "inherited", rb_obj_dummy, 1); + rb_define_private_method(rb_cModule, "included", rb_obj_dummy, 1); + rb_define_private_method(rb_cModule, "extended", rb_obj_dummy, 1); + rb_define_private_method(rb_cModule, "method_added", rb_obj_dummy, 1); + rb_define_private_method(rb_cModule, "method_removed", rb_obj_dummy, 1); + rb_define_private_method(rb_cModule, "method_undefined", rb_obj_dummy, 1); + + + rb_define_method(rb_mKernel, "nil?", rb_false, 0); + rb_define_method(rb_mKernel, "==", rb_obj_equal, 1); + rb_define_method(rb_mKernel, "equal?", rb_obj_equal, 1); + rb_define_method(rb_mKernel, "===", rb_equal, 1); + rb_define_method(rb_mKernel, "=~", rb_obj_pattern_match, 1); + + rb_define_method(rb_mKernel, "eql?", rb_obj_equal, 1); + + rb_define_method(rb_mKernel, "hash", rb_obj_id, 0); + rb_define_method(rb_mKernel, "__id__", rb_obj_id, 0); + rb_define_method(rb_mKernel, "object_id", rb_obj_id, 0); + rb_define_method(rb_mKernel, "class", rb_obj_class, 0); + + rb_define_method(rb_mKernel, "clone", rb_obj_clone, 0); + rb_define_method(rb_mKernel, "dup", rb_obj_dup, 0); + rb_define_method(rb_mKernel, "initialize_copy", rb_obj_init_copy, 1); + + rb_define_method(rb_mKernel, "taint", rb_obj_taint, 0); + rb_define_method(rb_mKernel, "tainted?", rb_obj_tainted, 0); + rb_define_method(rb_mKernel, "untaint", rb_obj_untaint, 0); + rb_define_method(rb_mKernel, "freeze", rb_obj_freeze, 0); + rb_define_method(rb_mKernel, "frozen?", rb_obj_frozen_p, 0); + + rb_define_method(rb_mKernel, "to_s", rb_any_to_s, 0); + rb_define_method(rb_mKernel, "inspect", rb_obj_inspect, 0); + rb_define_method(rb_mKernel, "methods", rb_obj_methods, -1); + rb_define_method(rb_mKernel, "singleton_methods", rb_obj_singleton_methods, -1); /* in class.c */ + rb_define_method(rb_mKernel, "protected_methods", rb_obj_protected_methods, -1); + rb_define_method(rb_mKernel, "private_methods", rb_obj_private_methods, -1); + rb_define_method(rb_mKernel, "public_methods", rb_obj_public_methods, -1); + rb_define_method(rb_mKernel, "instance_variables", rb_obj_instance_variables, 0); /* in variable.c */ + rb_define_method(rb_mKernel, "instance_variable_get", rb_obj_ivar_get, 1); + rb_define_method(rb_mKernel, "instance_variable_set", rb_obj_ivar_set, 2); + rb_define_private_method(rb_mKernel, "remove_instance_variable", + rb_obj_remove_instance_variable, 1); /* in variable.c */ + + rb_define_method(rb_mKernel, "instance_of?", rb_obj_is_instance_of, 1); + rb_define_method(rb_mKernel, "kind_of?", rb_obj_is_kind_of, 1); + rb_define_method(rb_mKernel, "is_a?", rb_obj_is_kind_of, 1); + + rb_define_private_method(rb_mKernel, "singleton_method_added", rb_obj_dummy, 1); + rb_define_private_method(rb_mKernel, "singleton_method_removed", rb_obj_dummy, 1); + rb_define_private_method(rb_mKernel, "singleton_method_undefined", rb_obj_dummy, 1); + + rb_define_global_function("sprintf", rb_f_sprintf, -1); /* in sprintf.c */ + rb_define_global_function("format", rb_f_sprintf, -1); /* in sprintf.c */ + + rb_define_global_function("Integer", rb_f_integer, 1); + rb_define_global_function("Float", rb_f_float, 1); + + rb_define_global_function("String", rb_f_string, 1); + rb_define_global_function("Array", rb_f_array, 1); + + rb_cNilClass = rb_define_class("NilClass", rb_cObject); + rb_define_method(rb_cNilClass, "to_i", nil_to_i, 0); + rb_define_method(rb_cNilClass, "to_f", nil_to_f, 0); + rb_define_method(rb_cNilClass, "to_s", nil_to_s, 0); + rb_define_method(rb_cNilClass, "to_a", nil_to_a, 0); + rb_define_method(rb_cNilClass, "inspect", nil_inspect, 0); + rb_define_method(rb_cNilClass, "&", false_and, 1); + rb_define_method(rb_cNilClass, "|", false_or, 1); + rb_define_method(rb_cNilClass, "^", false_xor, 1); + + rb_define_method(rb_cNilClass, "nil?", rb_true, 0); + rb_undef_alloc_func(rb_cNilClass); + rb_undef_method(CLASS_OF(rb_cNilClass), "new"); + rb_define_global_const("NIL", Qnil); + + rb_cSymbol = rb_define_class("Symbol", rb_cObject); + rb_define_singleton_method(rb_cSymbol, "all_symbols", rb_sym_all_symbols, 0); /* in parse.y */ + rb_undef_alloc_func(rb_cSymbol); + rb_undef_method(CLASS_OF(rb_cSymbol), "new"); + + rb_define_method(rb_cSymbol, "to_i", sym_to_i, 0); + rb_define_method(rb_cSymbol, "inspect", sym_inspect, 0); + rb_define_method(rb_cSymbol, "to_s", sym_to_s, 0); + rb_define_method(rb_cSymbol, "id2name", sym_to_s, 0); + rb_define_method(rb_cSymbol, "to_sym", sym_to_sym, 0); + rb_define_method(rb_cSymbol, "===", rb_obj_equal, 1); + + rb_define_method(rb_cModule, "freeze", rb_mod_freeze, 0); + rb_define_method(rb_cModule, "===", rb_mod_eqq, 1); + rb_define_method(rb_cModule, "==", rb_obj_equal, 1); + rb_define_method(rb_cModule, "<=>", rb_mod_cmp, 1); + rb_define_method(rb_cModule, "<", rb_mod_lt, 1); + rb_define_method(rb_cModule, "<=", rb_class_inherited_p, 1); + rb_define_method(rb_cModule, ">", rb_mod_gt, 1); + rb_define_method(rb_cModule, ">=", rb_mod_ge, 1); + rb_define_method(rb_cModule, "initialize_copy", rb_mod_init_copy, 1); /* in class.c */ + rb_define_method(rb_cModule, "to_s", rb_mod_to_s, 0); + rb_define_method(rb_cModule, "included_modules", rb_mod_included_modules, 0); /* in class.c */ + rb_define_method(rb_cModule, "include?", rb_mod_include_p, 1); /* in class.c */ + rb_define_method(rb_cModule, "name", rb_mod_name, 0); /* in variable.c */ + rb_define_method(rb_cModule, "ancestors", rb_mod_ancestors, 0); /* in class.c */ + + rb_define_private_method(rb_cModule, "attr", rb_mod_attr, -1); + rb_define_private_method(rb_cModule, "attr_reader", rb_mod_attr_reader, -1); + rb_define_private_method(rb_cModule, "attr_writer", rb_mod_attr_writer, -1); + rb_define_private_method(rb_cModule, "attr_accessor", rb_mod_attr_accessor, -1); + + rb_define_alloc_func(rb_cModule, rb_module_s_alloc); + rb_define_method(rb_cModule, "initialize", rb_mod_initialize, 0); + rb_define_method(rb_cModule, "instance_methods", rb_class_instance_methods, -1); /* in class.c */ + rb_define_method(rb_cModule, "public_instance_methods", + rb_class_public_instance_methods, -1); /* in class.c */ + rb_define_method(rb_cModule, "protected_instance_methods", + rb_class_protected_instance_methods, -1); /* in class.c */ + rb_define_method(rb_cModule, "private_instance_methods", + rb_class_private_instance_methods, -1); /* in class.c */ + + rb_define_method(rb_cModule, "constants", rb_mod_constants, 0); /* in variable.c */ + rb_define_method(rb_cModule, "const_get", rb_mod_const_get, 1); + rb_define_method(rb_cModule, "const_set", rb_mod_const_set, 2); + rb_define_method(rb_cModule, "const_defined?", rb_mod_const_defined, 1); + rb_define_private_method(rb_cModule, "remove_const", + rb_mod_remove_const, 1); /* in variable.c */ + rb_define_method(rb_cModule, "const_missing", + rb_mod_const_missing, 1); /* in variable.c */ + rb_define_method(rb_cModule, "class_variables", + rb_mod_class_variables, 0); /* in variable.c */ + rb_define_private_method(rb_cModule, "remove_class_variable", + rb_mod_remove_cvar, 1); /* in variable.c */ + rb_define_private_method(rb_cModule, "class_variable_get", rb_mod_cvar_get, 1); + rb_define_private_method(rb_cModule, "class_variable_set", rb_mod_cvar_set, 2); + + rb_define_method(rb_cClass, "allocate", rb_obj_alloc, 0); + rb_define_method(rb_cClass, "new", rb_class_new_instance, -1); + rb_define_method(rb_cClass, "initialize", rb_class_initialize, -1); + rb_define_method(rb_cClass, "initialize_copy", rb_class_init_copy, 1); /* in class.c */ + rb_define_method(rb_cClass, "superclass", rb_class_superclass, 0); + rb_define_alloc_func(rb_cClass, rb_class_s_alloc); + rb_undef_method(rb_cClass, "extend_object"); + rb_undef_method(rb_cClass, "append_features"); + + rb_cData = rb_define_class("Data", rb_cObject); + rb_undef_alloc_func(rb_cData); + + ruby_top_self = rb_obj_alloc(rb_cObject); + rb_global_variable(&ruby_top_self); + rb_define_singleton_method(ruby_top_self, "to_s", main_to_s, 0); + + rb_cTrueClass = rb_define_class("TrueClass", rb_cObject); + rb_define_method(rb_cTrueClass, "to_s", true_to_s, 0); + rb_define_method(rb_cTrueClass, "&", true_and, 1); + rb_define_method(rb_cTrueClass, "|", true_or, 1); + rb_define_method(rb_cTrueClass, "^", true_xor, 1); + rb_undef_alloc_func(rb_cTrueClass); + rb_undef_method(CLASS_OF(rb_cTrueClass), "new"); + rb_define_global_const("TRUE", Qtrue); + + rb_cFalseClass = rb_define_class("FalseClass", rb_cObject); + rb_define_method(rb_cFalseClass, "to_s", false_to_s, 0); + rb_define_method(rb_cFalseClass, "&", false_and, 1); + rb_define_method(rb_cFalseClass, "|", false_or, 1); + rb_define_method(rb_cFalseClass, "^", false_xor, 1); + rb_undef_alloc_func(rb_cFalseClass); + rb_undef_method(CLASS_OF(rb_cFalseClass), "new"); + rb_define_global_const("FALSE", Qfalse); + + id_eq = rb_intern("=="); + id_eql = rb_intern("eql?"); + id_inspect = rb_intern("inspect"); + id_init_copy = rb_intern("initialize_copy"); +} +/********************************************************************** + + pack.c - + + $Author: matz $ + $Date: 2005/03/04 06:47:42 $ + created at: Thu Feb 10 15:17:05 JST 1994 + + Copyright (C) 1993-2003 Yukihiro Matsumoto + +**********************************************************************/ + +#include "ruby.h" +#include <sys/types.h> +#include <ctype.h> + +#define SIZE16 2 +#define SIZE32 4 + +#if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4 +# define NATINT_PACK +#endif + +#ifdef NATINT_PACK +# define OFF16B(p) ((char*)(p) + (natint?0:(sizeof(short) - SIZE16))) +# define OFF32B(p) ((char*)(p) + (natint?0:(sizeof(long) - SIZE32))) +# define NATINT_LEN(type,len) (natint?sizeof(type):(len)) +# ifdef WORDS_BIGENDIAN +# define OFF16(p) OFF16B(p) +# define OFF32(p) OFF32B(p) +# endif +# define NATINT_HTOVS(x) (natint?htovs(x):htov16(x)) +# define NATINT_HTOVL(x) (natint?htovl(x):htov32(x)) +# define NATINT_HTONS(x) (natint?htons(x):hton16(x)) +# define NATINT_HTONL(x) (natint?htonl(x):hton32(x)) +#else +# define NATINT_LEN(type,len) sizeof(type) +# define NATINT_HTOVS(x) htovs(x) +# define NATINT_HTOVL(x) htovl(x) +# define NATINT_HTONS(x) htons(x) +# define NATINT_HTONL(x) htonl(x) +#endif + +#ifndef OFF16 +# define OFF16(p) (char*)(p) +# define OFF32(p) (char*)(p) +#endif +#ifndef OFF16B +# define OFF16B(p) (char*)(p) +# define OFF32B(p) (char*)(p) +#endif + +#define define_swapx(x, xtype) \ +static xtype \ +TOKEN_PASTE(swap,x)(z) \ + xtype z; \ +{ \ + xtype r; \ + xtype *zp; \ + unsigned char *s, *t; \ + int i; \ + \ + zp = malloc(sizeof(xtype)); \ + *zp = z; \ + s = (unsigned char*)zp; \ + t = malloc(sizeof(xtype)); \ + for (i=0; i<sizeof(xtype); i++) { \ + t[sizeof(xtype)-i-1] = s[i]; \ + } \ + r = *(xtype *)t; \ + free(t); \ + free(zp); \ + return r; \ +} + +#ifndef swap16 +#define swap16(x) ((((x)&0xFF)<<8) | (((x)>>8)&0xFF)) +#endif +#if SIZEOF_SHORT == 2 +#define swaps(x) swap16(x) +#else +#if SIZEOF_SHORT == 4 +#define swaps(x) ((((x)&0xFF)<<24) \ + |(((x)>>24)&0xFF) \ + |(((x)&0x0000FF00)<<8) \ + |(((x)&0x00FF0000)>>8) ) +#else +define_swapx(s,short) +#endif +#endif + +#ifndef swap32 +#define swap32(x) ((((x)&0xFF)<<24) \ + |(((x)>>24)&0xFF) \ + |(((x)&0x0000FF00)<<8) \ + |(((x)&0x00FF0000)>>8) ) +#endif +#if SIZEOF_LONG == 4 +#define swapl(x) swap32(x) +#else +#if SIZEOF_LONG == 8 +#define swapl(x) ((((x)&0x00000000000000FF)<<56) \ + |(((x)&0xFF00000000000000)>>56) \ + |(((x)&0x000000000000FF00)<<40) \ + |(((x)&0x00FF000000000000)>>40) \ + |(((x)&0x0000000000FF0000)<<24) \ + |(((x)&0x0000FF0000000000)>>24) \ + |(((x)&0x00000000FF000000)<<8) \ + |(((x)&0x000000FF00000000)>>8)) +#else +define_swapx(l,long) +#endif +#endif + +#if SIZEOF_FLOAT == 4 +#if SIZEOF_LONG == 4 /* SIZEOF_FLOAT == 4 == SIZEOF_LONG */ +#define swapf(x) swapl(x) +#define FLOAT_SWAPPER unsigned long +#else +#if SIZEOF_SHORT == 4 /* SIZEOF_FLOAT == 4 == SIZEOF_SHORT */ +#define swapf(x) swaps(x) +#define FLOAT_SWAPPER unsigned short +#else /* SIZEOF_FLOAT == 4 but undivide by known size of int */ +define_swapx(f,float) +#endif /* #if SIZEOF_SHORT == 4 */ +#endif /* #if SIZEOF_LONG == 4 */ +#else /* SIZEOF_FLOAT != 4 */ +define_swapx(f,float) +#endif /* #if SIZEOF_FLOAT == 4 */ + +#if SIZEOF_DOUBLE == 8 +#if SIZEOF_LONG == 8 /* SIZEOF_DOUBLE == 8 == SIZEOF_LONG */ +#define swapd(x) swapl(x) +#define DOUBLE_SWAPPER unsigned long +#else +#if SIZEOF_LONG == 4 /* SIZEOF_DOUBLE == 8 && 4 == SIZEOF_LONG */ +static double +swapd(d) + const double d; +{ + double dtmp = d; + unsigned long utmp[2]; + unsigned long utmp0; + + utmp[0] = 0; utmp[1] = 0; + memcpy(utmp,&dtmp,sizeof(double)); + utmp0 = utmp[0]; + utmp[0] = swapl(utmp[1]); + utmp[1] = swapl(utmp0); + memcpy(&dtmp,utmp,sizeof(double)); + return dtmp; +} +#else +#if SIZEOF_SHORT == 4 /* SIZEOF_DOUBLE == 8 && 4 == SIZEOF_SHORT */ +static double +swapd(d) + const double d; +{ + double dtmp = d; + unsigned short utmp[2]; + unsigned short utmp0; + + utmp[0] = 0; utmp[1] = 0; + memcpy(utmp,&dtmp,sizeof(double)); + utmp0 = utmp[0]; + utmp[0] = swaps(utmp[1]); + utmp[1] = swaps(utmp0); + memcpy(&dtmp,utmp,sizeof(double)); + return dtmp; +} +#else /* SIZEOF_DOUBLE == 8 but undivied by known size of int */ +define_swapx(d, double) +#endif /* #if SIZEOF_SHORT == 4 */ +#endif /* #if SIZEOF_LONG == 4 */ +#endif /* #if SIZEOF_LONG == 8 */ +#else /* SIZEOF_DOUBLE != 8 */ +define_swapx(d, double) +#endif /* #if SIZEOF_DOUBLE == 8 */ + +#undef define_swapx + +#ifdef DYNAMIC_ENDIAN +#ifdef ntohs +#undef ntohs +#undef ntohl +#undef htons +#undef htonl +#endif +static int +endian() +{ + static int init = 0; + static int endian_value; + char *p; + + if (init) return endian_value; + init = 1; + p = (char*)&init; + return endian_value = p[0]?0:1; +} + +#define ntohs(x) (endian()?(x):swaps(x)) +#define ntohl(x) (endian()?(x):swapl(x)) +#define ntohf(x) (endian()?(x):swapf(x)) +#define ntohd(x) (endian()?(x):swapd(x)) +#define htons(x) (endian()?(x):swaps(x)) +#define htonl(x) (endian()?(x):swapl(x)) +#define htonf(x) (endian()?(x):swapf(x)) +#define htond(x) (endian()?(x):swapd(x)) +#define htovs(x) (endian()?swaps(x):(x)) +#define htovl(x) (endian()?swapl(x):(x)) +#define htovf(x) (endian()?swapf(x):(x)) +#define htovd(x) (endian()?swapd(x):(x)) +#define vtohs(x) (endian()?swaps(x):(x)) +#define vtohl(x) (endian()?swapl(x):(x)) +#define vtohf(x) (endian()?swapf(x):(x)) +#define vtohd(x) (endian()?swapd(x):(x)) +# ifdef NATINT_PACK +#define htov16(x) (endian()?swap16(x):(x)) +#define htov32(x) (endian()?swap32(x):(x)) +#define hton16(x) (endian()?(x):swap16(x)) +#define hton32(x) (endian()?(x):swap32(x)) +# endif +#else +#ifdef WORDS_BIGENDIAN +#ifndef ntohs +#define ntohs(x) (x) +#define ntohl(x) (x) +#define htons(x) (x) +#define htonl(x) (x) +#endif +#define ntohf(x) (x) +#define ntohd(x) (x) +#define htonf(x) (x) +#define htond(x) (x) +#define htovs(x) swaps(x) +#define htovl(x) swapl(x) +#define htovf(x) swapf(x) +#define htovd(x) swapd(x) +#define vtohs(x) swaps(x) +#define vtohl(x) swapl(x) +#define vtohf(x) swapf(x) +#define vtohd(x) swapd(x) +# ifdef NATINT_PACK +#define htov16(x) swap16(x) +#define htov32(x) swap32(x) +#define hton16(x) (x) +#define hton32(x) (x) +# endif +#else /* LITTLE ENDIAN */ +#ifdef ntohs +#undef ntohs +#undef ntohl +#undef htons +#undef htonl +#endif +#define ntohs(x) swaps(x) +#define ntohl(x) swapl(x) +#define htons(x) swaps(x) +#define htonl(x) swapl(x) +#define ntohf(x) swapf(x) +#define ntohd(x) swapd(x) +#define htonf(x) swapf(x) +#define htond(x) swapd(x) +#define htovs(x) (x) +#define htovl(x) (x) +#define htovf(x) (x) +#define htovd(x) (x) +#define vtohs(x) (x) +#define vtohl(x) (x) +#define vtohf(x) (x) +#define vtohd(x) (x) +# ifdef NATINT_PACK +#define htov16(x) (x) +#define htov32(x) (x) +#define hton16(x) swap16(x) +#define hton32(x) swap32(x) +# endif +#endif +#endif + +#ifdef FLOAT_SWAPPER +#define FLOAT_CONVWITH(y) FLOAT_SWAPPER y; +#define HTONF(x,y) (memcpy(&y,&x,sizeof(float)), \ + y = htonf((FLOAT_SWAPPER)y), \ + memcpy(&x,&y,sizeof(float)), \ + x) +#define HTOVF(x,y) (memcpy(&y,&x,sizeof(float)), \ + y = htovf((FLOAT_SWAPPER)y), \ + memcpy(&x,&y,sizeof(float)), \ + x) +#define NTOHF(x,y) (memcpy(&y,&x,sizeof(float)), \ + y = ntohf((FLOAT_SWAPPER)y), \ + memcpy(&x,&y,sizeof(float)), \ + x) +#define VTOHF(x,y) (memcpy(&y,&x,sizeof(float)), \ + y = vtohf((FLOAT_SWAPPER)y), \ + memcpy(&x,&y,sizeof(float)), \ + x) +#else +#define FLOAT_CONVWITH(y) +#define HTONF(x,y) htonf(x) +#define HTOVF(x,y) htovf(x) +#define NTOHF(x,y) ntohf(x) +#define VTOHF(x,y) vtohf(x) +#endif + +#ifdef DOUBLE_SWAPPER +#define DOUBLE_CONVWITH(y) DOUBLE_SWAPPER y; +#define HTOND(x,y) (memcpy(&y,&x,sizeof(double)), \ + y = htond((DOUBLE_SWAPPER)y), \ + memcpy(&x,&y,sizeof(double)), \ + x) +#define HTOVD(x,y) (memcpy(&y,&x,sizeof(double)), \ + y = htovd((DOUBLE_SWAPPER)y), \ + memcpy(&x,&y,sizeof(double)), \ + x) +#define NTOHD(x,y) (memcpy(&y,&x,sizeof(double)), \ + y = ntohd((DOUBLE_SWAPPER)y), \ + memcpy(&x,&y,sizeof(double)), \ + x) +#define VTOHD(x,y) (memcpy(&y,&x,sizeof(double)), \ + y = vtohd((DOUBLE_SWAPPER)y), \ + memcpy(&x,&y,sizeof(double)), \ + x) +#else +#define DOUBLE_CONVWITH(y) +#define HTOND(x,y) htond(x) +#define HTOVD(x,y) htovd(x) +#define NTOHD(x,y) ntohd(x) +#define VTOHD(x,y) vtohd(x) +#endif + +unsigned long rb_big2ulong_pack _((VALUE x)); + +static unsigned long +num2i32(x) + VALUE x; +{ + x = rb_to_int(x); /* is nil OK? (should not) */ + + if (FIXNUM_P(x)) return FIX2LONG(x); + if (TYPE(x) == T_BIGNUM) { + return rb_big2ulong_pack(x); + } + rb_raise(rb_eTypeError, "can't convert %s to `integer'", rb_obj_classname(x)); + return 0; /* not reached */ +} + +#if SIZEOF_LONG == SIZE32 || SIZEOF_INT == SIZE32 +# define EXTEND32(x) +#else +/* invariant in modulo 1<<31 */ +# define EXTEND32(x) do {if (!natint) {(x) = (I32)(((1<<31)-1-(x))^~(~0<<31))}} while(0) +#endif +#if SIZEOF_SHORT == SIZE16 +# define EXTEND16(x) +#else +# define EXTEND16(x) do { if (!natint) {(x) = (short)(((1<<15)-1-(x))^~(~0<<15))}} while(0) +#endif + +#ifdef HAVE_LONG_LONG +# define QUAD_SIZE sizeof(LONG_LONG) +#else +# define QUAD_SIZE 8 +#endif +static char *toofew = "too few arguments"; + +static void encodes _((VALUE,char*,long,int)); +static void qpencode _((VALUE,VALUE,long)); + +static int uv_to_utf8 _((char*,unsigned long)); +static unsigned long utf8_to_uv _((char*,long*)); + +/* + * call-seq: + * arr.pack ( aTemplateString ) -> aBinaryString + * + * Packs the contents of <i>arr</i> into a binary sequence according to + * the directives in <i>aTemplateString</i> (see the table below) + * Directives ``A,'' ``a,'' and ``Z'' may be followed by a count, + * which gives the width of the resulting field. The remaining + * directives also may take a count, indicating the number of array + * elements to convert. If the count is an asterisk + * (``<code>*</code>''), all remaining array elements will be + * converted. Any of the directives ``<code>sSiIlL</code>'' may be + * followed by an underscore (``<code>_</code>'') to use the underlying + * platform's native size for the specified type; otherwise, they use a + * platform-independent size. Spaces are ignored in the template + * string. See also <code>String#unpack</code>. + * + * a = [ "a", "b", "c" ] + * n = [ 65, 66, 67 ] + * a.pack("A3A3A3") #=> "a b c " + * a.pack("a3a3a3") #=> "a\000\000b\000\000c\000\000" + * n.pack("ccc") #=> "ABC" + * + * Directives for +pack+. + * + * Directive Meaning + * --------------------------------------------------------------- + * @ | Moves to absolute position + * A | ASCII string (space padded, count is width) + * a | ASCII string (null padded, count is width) + * B | Bit string (descending bit order) + * b | Bit string (ascending bit order) + * C | Unsigned char + * c | Char + * D, d | Double-precision float, native format + * E | Double-precision float, little-endian byte order + * e | Single-precision float, little-endian byte order + * F, f | Single-precision float, native format + * G | Double-precision float, network (big-endian) byte order + * g | Single-precision float, network (big-endian) byte order + * H | Hex string (high nibble first) + * h | Hex string (low nibble first) + * I | Unsigned integer + * i | Integer + * L | Unsigned long + * l | Long + * M | Quoted printable, MIME encoding (see RFC2045) + * m | Base64 encoded string + * N | Long, network (big-endian) byte order + * n | Short, network (big-endian) byte-order + * P | Pointer to a structure (fixed-length string) + * p | Pointer to a null-terminated string + * Q, q | 64-bit number + * S | Unsigned short + * s | Short + * U | UTF-8 + * u | UU-encoded string + * V | Long, little-endian byte order + * v | Short, little-endian byte order + * w | BER-compressed integer\fnm + * X | Back up a byte + * x | Null byte + * Z | Same as ``a'', except that null is added with * + */ + +static VALUE +pack_pack(ary, fmt) + VALUE ary, fmt; +{ + static char *nul10 = "\0\0\0\0\0\0\0\0\0\0"; + static char *spc10 = " "; + char *p, *pend; + VALUE res, from, associates = 0; + char type; + long items, len, idx, plen; + char *ptr; +#ifdef NATINT_PACK + int natint; /* native integer */ +#endif + + StringValue(fmt); + p = RSTRING(fmt)->ptr; + pend = p + RSTRING(fmt)->len; + res = rb_str_buf_new(0); + + items = RARRAY(ary)->len; + idx = 0; + +#define THISFROM RARRAY(ary)->ptr[idx] +#define NEXTFROM (items-- > 0 ? RARRAY(ary)->ptr[idx++] : (rb_raise(rb_eArgError, toofew),0)) + + while (p < pend) { + if (RSTRING(fmt)->ptr + RSTRING(fmt)->len != pend) { + rb_raise(rb_eRuntimeError, "format string modified"); + } + type = *p++; /* get data type */ +#ifdef NATINT_PACK + natint = 0; +#endif + + if (ISSPACE(type)) continue; + if (type == '#') { + while ((p < pend) && (*p != '\n')) { + p++; + } + continue; + } + if (*p == '_' || *p == '!') { + const char *natstr = "sSiIlL"; + + if (strchr(natstr, type)) { +#ifdef NATINT_PACK + natint = 1; +#endif + p++; + } + else { + rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr); + } + } + if (*p == '*') { /* set data length */ + len = strchr("@Xxu", type) ? 0 : items; + p++; + } + else if (ISDIGIT(*p)) { + len = strtoul(p, (char**)&p, 10); + } + else { + len = 1; + } + + switch (type) { + case 'A': case 'a': case 'Z': + case 'B': case 'b': + case 'H': case 'h': + from = NEXTFROM; + if (NIL_P(from)) { + ptr = ""; + plen = 0; + } + else { + StringValue(from); + ptr = RSTRING(from)->ptr; + plen = RSTRING(from)->len; + OBJ_INFECT(res, from); + } + + if (p[-1] == '*') + len = plen; + + switch (type) { + case 'a': /* arbitrary binary string (null padded) */ + case 'A': /* ASCII string (space padded) */ + case 'Z': /* null terminated ASCII string */ + if (plen >= len) { + rb_str_buf_cat(res, ptr, len); + if (p[-1] == '*' && type == 'Z') + rb_str_buf_cat(res, nul10, 1); + } + else { + rb_str_buf_cat(res, ptr, plen); + len -= plen; + while (len >= 10) { + rb_str_buf_cat(res, (type == 'A')?spc10:nul10, 10); + len -= 10; + } + rb_str_buf_cat(res, (type == 'A')?spc10:nul10, len); + } + break; + + case 'b': /* bit string (ascending) */ + { + int byte = 0; + long i, j = 0; + + if (len > plen) { + j = (len - plen + 1)/2; + len = plen; + } + for (i=0; i++ < len; ptr++) { + if (*ptr & 1) + byte |= 128; + if (i & 7) + byte >>= 1; + else { + char c = byte & 0xff; + rb_str_buf_cat(res, &c, 1); + byte = 0; + } + } + if (len & 7) { + char c; + byte >>= 7 - (len & 7); + c = byte & 0xff; + rb_str_buf_cat(res, &c, 1); + } + len = j; + goto grow; + } + break; + + case 'B': /* bit string (descending) */ + { + int byte = 0; + long i, j = 0; + + if (len > plen) { + j = (len - plen + 1)/2; + len = plen; + } + for (i=0; i++ < len; ptr++) { + byte |= *ptr & 1; + if (i & 7) + byte <<= 1; + else { + char c = byte & 0xff; + rb_str_buf_cat(res, &c, 1); + byte = 0; + } + } + if (len & 7) { + char c; + byte <<= 7 - (len & 7); + c = byte & 0xff; + rb_str_buf_cat(res, &c, 1); + } + len = j; + goto grow; + } + break; + + case 'h': /* hex string (low nibble first) */ + { + int byte = 0; + long i, j = 0; + + if (len > plen) { + j = (len - plen + 1)/2; + len = plen; + } + for (i=0; i++ < len; ptr++) { + if (ISALPHA(*ptr)) + byte |= (((*ptr & 15) + 9) & 15) << 4; + else + byte |= (*ptr & 15) << 4; + if (i & 1) + byte >>= 4; + else { + char c = byte & 0xff; + rb_str_buf_cat(res, &c, 1); + byte = 0; + } + } + if (len & 1) { + char c = byte & 0xff; + rb_str_buf_cat(res, &c, 1); + } + len = j; + goto grow; + } + break; + + case 'H': /* hex string (high nibble first) */ + { + int byte = 0; + long i, j = 0; + + if (len > plen) { + j = (len - plen + 1)/2; + len = plen; + } + for (i=0; i++ < len; ptr++) { + if (ISALPHA(*ptr)) + byte |= ((*ptr & 15) + 9) & 15; + else + byte |= *ptr & 15; + if (i & 1) + byte <<= 4; + else { + char c = byte & 0xff; + rb_str_buf_cat(res, &c, 1); + byte = 0; + } + } + if (len & 1) { + char c = byte & 0xff; + rb_str_buf_cat(res, &c, 1); + } + len = j; + goto grow; + } + break; + } + break; + + case 'c': /* signed char */ + case 'C': /* unsigned char */ + while (len-- > 0) { + char c; + + from = NEXTFROM; + c = num2i32(from); + rb_str_buf_cat(res, &c, sizeof(char)); + } + break; + + case 's': /* signed short */ + case 'S': /* unsigned short */ + while (len-- > 0) { + short s; + + from = NEXTFROM; + s = num2i32(from); + rb_str_buf_cat(res, OFF16(&s), NATINT_LEN(short,2)); + } + break; + + case 'i': /* signed int */ + case 'I': /* unsigned int */ + while (len-- > 0) { + long i; + + from = NEXTFROM; + i = num2i32(from); + rb_str_buf_cat(res, OFF32(&i), NATINT_LEN(int,4)); + } + break; + + case 'l': /* signed long */ + case 'L': /* unsigned long */ + while (len-- > 0) { + long l; + + from = NEXTFROM; + l = num2i32(from); + rb_str_buf_cat(res, OFF32(&l), NATINT_LEN(long,4)); + } + break; + + case 'q': /* signed quad (64bit) int */ + case 'Q': /* unsigned quad (64bit) int */ + while (len-- > 0) { + char tmp[QUAD_SIZE]; + + from = NEXTFROM; + rb_quad_pack(tmp, from); + rb_str_buf_cat(res, (char*)&tmp, QUAD_SIZE); + } + break; + + case 'n': /* unsigned short (network byte-order) */ + while (len-- > 0) { + unsigned short s; + + from = NEXTFROM; + s = num2i32(from); + s = NATINT_HTONS(s); + rb_str_buf_cat(res, OFF16(&s), NATINT_LEN(short,2)); + } + break; + + case 'N': /* unsigned long (network byte-order) */ + while (len-- > 0) { + unsigned long l; + + from = NEXTFROM; + l = num2i32(from); + l = NATINT_HTONL(l); + rb_str_buf_cat(res, OFF32(&l), NATINT_LEN(long,4)); + } + break; + + case 'v': /* unsigned short (VAX byte-order) */ + while (len-- > 0) { + unsigned short s; + + from = NEXTFROM; + s = num2i32(from); + s = NATINT_HTOVS(s); + rb_str_buf_cat(res, OFF16(&s), NATINT_LEN(short,2)); + } + break; + + case 'V': /* unsigned long (VAX byte-order) */ + while (len-- > 0) { + unsigned long l; + + from = NEXTFROM; + l = num2i32(from); + l = NATINT_HTOVL(l); + rb_str_buf_cat(res, OFF32(&l), NATINT_LEN(long,4)); + } + break; + + case 'f': /* single precision float in native format */ + case 'F': /* ditto */ + while (len-- > 0) { + float f; + + from = NEXTFROM; + f = RFLOAT(rb_Float(from))->value; + rb_str_buf_cat(res, (char*)&f, sizeof(float)); + } + break; + + case 'e': /* single precision float in VAX byte-order */ + while (len-- > 0) { + float f; + FLOAT_CONVWITH(ftmp); + + from = NEXTFROM; + f = RFLOAT(rb_Float(from))->value; + f = HTOVF(f,ftmp); + rb_str_buf_cat(res, (char*)&f, sizeof(float)); + } + break; + + case 'E': /* double precision float in VAX byte-order */ + while (len-- > 0) { + double d; + DOUBLE_CONVWITH(dtmp); + + from = NEXTFROM; + d = RFLOAT(rb_Float(from))->value; + d = HTOVD(d,dtmp); + rb_str_buf_cat(res, (char*)&d, sizeof(double)); + } + break; + + case 'd': /* double precision float in native format */ + case 'D': /* ditto */ + while (len-- > 0) { + double d; + + from = NEXTFROM; + d = RFLOAT(rb_Float(from))->value; + rb_str_buf_cat(res, (char*)&d, sizeof(double)); + } + break; + + case 'g': /* single precision float in network byte-order */ + while (len-- > 0) { + float f; + FLOAT_CONVWITH(ftmp); + + from = NEXTFROM; + f = RFLOAT(rb_Float(from))->value; + f = HTONF(f,ftmp); + rb_str_buf_cat(res, (char*)&f, sizeof(float)); + } + break; + + case 'G': /* double precision float in network byte-order */ + while (len-- > 0) { + double d; + DOUBLE_CONVWITH(dtmp); + + from = NEXTFROM; + d = RFLOAT(rb_Float(from))->value; + d = HTOND(d,dtmp); + rb_str_buf_cat(res, (char*)&d, sizeof(double)); + } + break; + + case 'x': /* null byte */ + grow: + while (len >= 10) { + rb_str_buf_cat(res, nul10, 10); + len -= 10; + } + rb_str_buf_cat(res, nul10, len); + break; + + case 'X': /* back up byte */ + shrink: + plen = RSTRING(res)->len; + if (plen < len) + rb_raise(rb_eArgError, "X outside of string"); + RSTRING(res)->len = plen - len; + RSTRING(res)->ptr[plen - len] = '\0'; + break; + + case '@': /* null fill to absolute position */ + len -= RSTRING(res)->len; + if (len > 0) goto grow; + len = -len; + if (len > 0) goto shrink; + break; + + case '%': + rb_raise(rb_eArgError, "%% is not supported"); + break; + + case 'U': /* Unicode character */ + while (len-- > 0) { + long l; + char buf[8]; + int le; + + from = NEXTFROM; + from = rb_to_int(from); + l = NUM2INT(from); + if (l < 0) { + rb_raise(rb_eRangeError, "pack(U): value out of range"); + } + le = uv_to_utf8(buf, l); + rb_str_buf_cat(res, (char*)buf, le); + } + break; + + case 'u': /* uuencoded string */ + case 'm': /* base64 encoded string */ + from = NEXTFROM; + StringValue(from); + ptr = RSTRING(from)->ptr; + plen = RSTRING(from)->len; + + if (len <= 2) + len = 45; + else + len = len / 3 * 3; + while (plen > 0) { + long todo; + + if (plen > len) + todo = len; + else + todo = plen; + encodes(res, ptr, todo, type); + plen -= todo; + ptr += todo; + } + break; + + case 'M': /* quoted-printable encoded string */ + from = rb_obj_as_string(NEXTFROM); + if (len <= 1) + len = 72; + qpencode(res, from, len); + break; + + case 'P': /* pointer to packed byte string */ + from = THISFROM; + if (!NIL_P(from)) { + StringValue(from); + if (RSTRING(from)->len < len) { + rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)", + RSTRING(from)->len, len); + } + } + len = 1; + /* FALL THROUGH */ + case 'p': /* pointer to string */ + while (len-- > 0) { + char *t; + from = NEXTFROM; + if (NIL_P(from)) { + t = 0; + } + else { + t = StringValuePtr(from); + } + if (!associates) { + associates = rb_ary_new(); + } + rb_ary_push(associates, from); + rb_str_buf_cat(res, (char*)&t, sizeof(char*)); + } + break; + + case 'w': /* BER compressed integer */ + while (len-- > 0) { + unsigned long ul; + VALUE buf = rb_str_new(0, 0); + char c, *bufs, *bufe; + + from = NEXTFROM; + if (TYPE(from) == T_BIGNUM) { + VALUE big128 = rb_uint2big(128); + while (TYPE(from) == T_BIGNUM) { + from = rb_big_divmod(from, big128); + c = NUM2INT(RARRAY(from)->ptr[1]) | 0x80; /* mod */ + rb_str_buf_cat(buf, &c, sizeof(char)); + from = RARRAY(from)->ptr[0]; /* div */ + } + } + + { + long l = NUM2LONG(from); + if (l < 0) { + rb_raise(rb_eArgError, "can't compress negative numbers"); + } + ul = l; + } + + while (ul) { + c = ((ul & 0x7f) | 0x80); + rb_str_buf_cat(buf, &c, sizeof(char)); + ul >>= 7; + } + + if (RSTRING(buf)->len) { + bufs = RSTRING(buf)->ptr; + bufe = bufs + RSTRING(buf)->len - 1; + *bufs &= 0x7f; /* clear continue bit */ + while (bufs < bufe) { /* reverse */ + c = *bufs; + *bufs++ = *bufe; + *bufe-- = c; + } + rb_str_buf_cat(res, RSTRING(buf)->ptr, RSTRING(buf)->len); + } + else { + c = 0; + rb_str_buf_cat(res, &c, sizeof(char)); + } + } + break; + + default: + break; + } + } + + if (associates) { + rb_str_associate(res, associates); + } + return res; +} + +static char uu_table[] = +"`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_"; +static char b64_table[] = +"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +static void +encodes(str, s, len, type) + VALUE str; + char *s; + long len; + int type; +{ + char *buff = ALLOCA_N(char, len * 4 / 3 + 6); + long i = 0; + char *trans = type == 'u' ? uu_table : b64_table; + int padding; + + if (type == 'u') { + buff[i++] = len + ' '; + padding = '`'; + } + else { + padding = '='; + } + while (len >= 3) { + buff[i++] = trans[077 & (*s >> 2)]; + buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))]; + buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))]; + buff[i++] = trans[077 & s[2]]; + s += 3; + len -= 3; + } + if (len == 2) { + buff[i++] = trans[077 & (*s >> 2)]; + buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))]; + buff[i++] = trans[077 & (((s[1] << 2) & 074) | (('\0' >> 6) & 03))]; + buff[i++] = padding; + } + else if (len == 1) { + buff[i++] = trans[077 & (*s >> 2)]; + buff[i++] = trans[077 & (((*s << 4) & 060) | (('\0' >> 4) & 017))]; + buff[i++] = padding; + buff[i++] = padding; + } + buff[i++] = '\n'; + rb_str_buf_cat(str, buff, i); +} + +static char hex_table[] = "0123456789ABCDEF"; + +static void +qpencode(str, from, len) + VALUE str, from; + long len; +{ + char buff[1024]; + long i = 0, n = 0, prev = EOF; + unsigned char *s = (unsigned char*)RSTRING(from)->ptr; + unsigned char *send = s + RSTRING(from)->len; + + while (s < send) { + if ((*s > 126) || + (*s < 32 && *s != '\n' && *s != '\t') || + (*s == '=')) { + buff[i++] = '='; + buff[i++] = hex_table[*s >> 4]; + buff[i++] = hex_table[*s & 0x0f]; + n += 3; + prev = EOF; + } + else if (*s == '\n') { + if (prev == ' ' || prev == '\t') { + buff[i++] = '='; + buff[i++] = *s; + } + buff[i++] = *s; + n = 0; + prev = *s; + } + else { + buff[i++] = *s; + n++; + prev = *s; + } + if (n > len) { + buff[i++] = '='; + buff[i++] = '\n'; + n = 0; + prev = '\n'; + } + if (i > 1024 - 5) { + rb_str_buf_cat(str, buff, i); + i = 0; + } + s++; + } + if (n > 0) { + buff[i++] = '='; + buff[i++] = '\n'; + } + if (i > 0) { + rb_str_buf_cat(str, buff, i); + } +} + +static inline int +hex2num(c) + char c; +{ + switch (c) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return c - '0'; + case 'a': case 'b': case 'c': + case 'd': case 'e': case 'f': + return c - 'a' + 10; + case 'A': case 'B': case 'C': + case 'D': case 'E': case 'F': + return c - 'A' + 10; + default: + return -1; + } +} + +#define PACK_LENGTH_ADJUST_SIZE(sz) do { \ + tmp = 0; \ + if (len > (send-s)/sz) { \ + if (!star) { \ + tmp = len-(send-s)/sz; \ + } \ + len = (send-s)/sz; \ + } \ +} while (0) + +#ifdef NATINT_PACK +#define PACK_LENGTH_ADJUST(type,sz) do { \ + int t__len = NATINT_LEN(type,(sz)); \ + PACK_LENGTH_ADJUST_SIZE(t__len); \ +} while (0) +#else +#define PACK_LENGTH_ADJUST(type,sz) \ + PACK_LENGTH_ADJUST_SIZE(sizeof(type)) +#endif + +#define PACK_ITEM_ADJUST() while (tmp--) rb_ary_push(ary, Qnil) + +static VALUE +infected_str_new(ptr, len, str) + const char *ptr; + long len; + VALUE str; +{ + VALUE s = rb_str_new(ptr, len); + + OBJ_INFECT(s, str); + return s; +} + +/* + * call-seq: + * str.unpack(format) => anArray + * + * Decodes <i>str</i> (which may contain binary data) according to the + * format string, returning an array of each value extracted. The + * format string consists of a sequence of single-character directives, + * summarized in the table at the end of this entry. + * Each directive may be followed + * by a number, indicating the number of times to repeat with this + * directive. An asterisk (``<code>*</code>'') will use up all + * remaining elements. The directives <code>sSiIlL</code> may each be + * followed by an underscore (``<code>_</code>'') to use the underlying + * platform's native size for the specified type; otherwise, it uses a + * platform-independent consistent size. Spaces are ignored in the + * format string. See also <code>Array#pack</code>. + * + * "abc \0\0abc \0\0".unpack('A6Z6') #=> ["abc", "abc "] + * "abc \0\0".unpack('a3a3') #=> ["abc", " \000\000"] + * "abc \0abc \0".unpack('Z*Z*') #=> ["abc ", "abc "] + * "aa".unpack('b8B8') #=> ["10000110", "01100001"] + * "aaa".unpack('h2H2c') #=> ["16", "61", 97] + * "\xfe\xff\xfe\xff".unpack('sS') #=> [-2, 65534] + * "now=20is".unpack('M*') #=> ["now is"] + * "whole".unpack('xax2aX2aX1aX2a') #=> ["h", "e", "l", "l", "o"] + * + * This table summarizes the various formats and the Ruby classes + * returned by each. + * + * Format | Returns | Function + * -------+---------+----------------------------------------- + * A | String | with trailing nulls and spaces removed + * -------+---------+----------------------------------------- + * a | String | string + * -------+---------+----------------------------------------- + * B | String | extract bits from each character (msb first) + * -------+---------+----------------------------------------- + * b | String | extract bits from each character (lsb first) + * -------+---------+----------------------------------------- + * C | Fixnum | extract a character as an unsigned integer + * -------+---------+----------------------------------------- + * c | Fixnum | extract a character as an integer + * -------+---------+----------------------------------------- + * d,D | Float | treat sizeof(double) characters as + * | | a native double + * -------+---------+----------------------------------------- + * E | Float | treat sizeof(double) characters as + * | | a double in little-endian byte order + * -------+---------+----------------------------------------- + * e | Float | treat sizeof(float) characters as + * | | a float in little-endian byte order + * -------+---------+----------------------------------------- + * f,F | Float | treat sizeof(float) characters as + * | | a native float + * -------+---------+----------------------------------------- + * G | Float | treat sizeof(double) characters as + * | | a double in network byte order + * -------+---------+----------------------------------------- + * g | Float | treat sizeof(float) characters as a + * | | float in network byte order + * -------+---------+----------------------------------------- + * H | String | extract hex nibbles from each character + * | | (most significant first) + * -------+---------+----------------------------------------- + * h | String | extract hex nibbles from each character + * | | (least significant first) + * -------+---------+----------------------------------------- + * I | Integer | treat sizeof(int) (modified by _) + * | | successive characters as an unsigned + * | | native integer + * -------+---------+----------------------------------------- + * i | Integer | treat sizeof(int) (modified by _) + * | | successive characters as a signed + * | | native integer + * -------+---------+----------------------------------------- + * L | Integer | treat four (modified by _) successive + * | | characters as an unsigned native + * | | long integer + * -------+---------+----------------------------------------- + * l | Integer | treat four (modified by _) successive + * | | characters as a signed native + * | | long integer + * -------+---------+----------------------------------------- + * M | String | quoted-printable + * -------+---------+----------------------------------------- + * m | String | base64-encoded + * -------+---------+----------------------------------------- + * N | Integer | treat four characters as an unsigned + * | | long in network byte order + * -------+---------+----------------------------------------- + * n | Fixnum | treat two characters as an unsigned + * | | short in network byte order + * -------+---------+----------------------------------------- + * P | String | treat sizeof(char *) characters as a + * | | pointer, and return \emph{len} characters + * | | from the referenced location + * -------+---------+----------------------------------------- + * p | String | treat sizeof(char *) characters as a + * | | pointer to a null-terminated string + * -------+---------+----------------------------------------- + * Q | Integer | treat 8 characters as an unsigned + * | | quad word (64 bits) + * -------+---------+----------------------------------------- + * q | Integer | treat 8 characters as a signed + * | | quad word (64 bits) + * -------+---------+----------------------------------------- + * S | Fixnum | treat two (different if _ used) + * | | successive characters as an unsigned + * | | short in native byte order + * -------+---------+----------------------------------------- + * s | Fixnum | Treat two (different if _ used) + * | | successive characters as a signed short + * | | in native byte order + * -------+---------+----------------------------------------- + * U | Integer | UTF-8 characters as unsigned integers + * -------+---------+----------------------------------------- + * u | String | UU-encoded + * -------+---------+----------------------------------------- + * V | Fixnum | treat four characters as an unsigned + * | | long in little-endian byte order + * -------+---------+----------------------------------------- + * v | Fixnum | treat two characters as an unsigned + * | | short in little-endian byte order + * -------+---------+----------------------------------------- + * w | Integer | BER-compressed integer (see Array.pack) + * -------+---------+----------------------------------------- + * X | --- | skip backward one character + * -------+---------+----------------------------------------- + * x | --- | skip forward one character + * -------+---------+----------------------------------------- + * Z | String | with trailing nulls removed + * | | upto first null with * + * -------+---------+----------------------------------------- + * @ | --- | skip to the offset given by the + * | | length argument + * -------+---------+----------------------------------------- + */ + +static VALUE +pack_unpack(str, fmt) + VALUE str, fmt; +{ + static char *hexdigits = "0123456789abcdef0123456789ABCDEFx"; + char *s, *send; + char *p, *pend; + VALUE ary; + char type; + long len; + int tmp, star; +#ifdef NATINT_PACK + int natint; /* native integer */ +#endif + + StringValue(str); + StringValue(fmt); + s = RSTRING(str)->ptr; + send = s + RSTRING(str)->len; + p = RSTRING(fmt)->ptr; + pend = p + RSTRING(fmt)->len; + + ary = rb_ary_new(); + while (p < pend) { + type = *p++; +#ifdef NATINT_PACK + natint = 0; +#endif + + if (ISSPACE(type)) continue; + if (type == '#') { + while ((p < pend) && (*p != '\n')) { + p++; + } + continue; + } + star = 0; + if (*p == '_' || *p == '!') { + char *natstr = "sSiIlL"; + + if (strchr(natstr, type)) { +#ifdef NATINT_PACK + natint = 1; +#endif + p++; + } + else { + rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr); + } + } + if (p >= pend) + len = 1; + else if (*p == '*') { + star = 1; + len = send - s; + p++; + } + else if (ISDIGIT(*p)) { + len = strtoul(p, (char**)&p, 10); + } + else { + len = (type != '@'); + } + + switch (type) { + case '%': + rb_raise(rb_eArgError, "%% is not supported"); + break; + + case 'A': + if (len > send - s) len = send - s; + { + long end = len; + char *t = s + len - 1; + + while (t >= s) { + if (*t != ' ' && *t != '\0') break; + t--; len--; + } + rb_ary_push(ary, infected_str_new(s, len, str)); + s += end; + } + break; + + case 'Z': + { + char *t = s; + + if (len > send-s) len = send-s; + while (t < s+len && *t) t++; + rb_ary_push(ary, infected_str_new(s, t-s, str)); + if (t < send) t++; + s = star ? t : s+len; + } + break; + + case 'a': + if (len > send - s) len = send - s; + rb_ary_push(ary, infected_str_new(s, len, str)); + s += len; + break; + + + case 'b': + { + VALUE bitstr; + char *t; + int bits; + long i; + + if (p[-1] == '*' || len > (send - s) * 8) + len = (send - s) * 8; + bits = 0; + rb_ary_push(ary, bitstr = rb_str_new(0, len)); + t = RSTRING(bitstr)->ptr; + for (i=0; i<len; i++) { + if (i & 7) bits >>= 1; + else bits = *s++; + *t++ = (bits & 1) ? '1' : '0'; + } + } + break; + + case 'B': + { + VALUE bitstr; + char *t; + int bits; + long i; + + if (p[-1] == '*' || len > (send - s) * 8) + len = (send - s) * 8; + bits = 0; + rb_ary_push(ary, bitstr = rb_str_new(0, len)); + t = RSTRING(bitstr)->ptr; + for (i=0; i<len; i++) { + if (i & 7) bits <<= 1; + else bits = *s++; + *t++ = (bits & 128) ? '1' : '0'; + } + } + break; + + case 'h': + { + VALUE bitstr; + char *t; + int bits; + long i; + + if (p[-1] == '*' || len > (send - s) * 2) + len = (send - s) * 2; + bits = 0; + rb_ary_push(ary, bitstr = rb_str_new(0, len)); + t = RSTRING(bitstr)->ptr; + for (i=0; i<len; i++) { + if (i & 1) + bits >>= 4; + else + bits = *s++; + *t++ = hexdigits[bits & 15]; + } + } + break; + + case 'H': + { + VALUE bitstr; + char *t; + int bits; + long i; + + if (p[-1] == '*' || len > (send - s) * 2) + len = (send - s) * 2; + bits = 0; + rb_ary_push(ary, bitstr = rb_str_new(0, len)); + t = RSTRING(bitstr)->ptr; + for (i=0; i<len; i++) { + if (i & 1) + bits <<= 4; + else + bits = *s++; + *t++ = hexdigits[(bits >> 4) & 15]; + } + } + break; + + case 'c': + PACK_LENGTH_ADJUST(char,sizeof(char)); + while (len-- > 0) { + int c = *s++; + if (c > (char)127) c-=256; + rb_ary_push(ary, INT2FIX(c)); + } + PACK_ITEM_ADJUST(); + break; + + case 'C': + PACK_LENGTH_ADJUST(unsigned char,sizeof(unsigned char)); + while (len-- > 0) { + unsigned char c = *s++; + rb_ary_push(ary, INT2FIX(c)); + } + PACK_ITEM_ADJUST(); + break; + + case 's': + PACK_LENGTH_ADJUST(short,2); + while (len-- > 0) { + short tmp = 0; + memcpy(OFF16(&tmp), s, NATINT_LEN(short,2)); + EXTEND16(tmp); + s += NATINT_LEN(short,2); + rb_ary_push(ary, INT2FIX(tmp)); + } + PACK_ITEM_ADJUST(); + break; + + case 'S': + PACK_LENGTH_ADJUST(unsigned short,2); + while (len-- > 0) { + unsigned short tmp = 0; + memcpy(OFF16(&tmp), s, NATINT_LEN(unsigned short,2)); + s += NATINT_LEN(unsigned short,2); + rb_ary_push(ary, INT2FIX(tmp)); + } + PACK_ITEM_ADJUST(); + break; + + case 'i': + PACK_LENGTH_ADJUST(int,sizeof(int)); + while (len-- > 0) { + int tmp; + memcpy(&tmp, s, sizeof(int)); + s += sizeof(int); + rb_ary_push(ary, INT2NUM(tmp)); + } + PACK_ITEM_ADJUST(); + break; + + case 'I': + PACK_LENGTH_ADJUST(unsigned int,sizeof(unsigned int)); + while (len-- > 0) { + unsigned int tmp; + memcpy(&tmp, s, sizeof(unsigned int)); + s += sizeof(unsigned int); + rb_ary_push(ary, UINT2NUM(tmp)); + } + PACK_ITEM_ADJUST(); + break; + + case 'l': + PACK_LENGTH_ADJUST(long,4); + while (len-- > 0) { + long tmp = 0; + memcpy(OFF32(&tmp), s, NATINT_LEN(long,4)); + EXTEND32(tmp); + s += NATINT_LEN(long,4); + rb_ary_push(ary, LONG2NUM(tmp)); + } + PACK_ITEM_ADJUST(); + break; + case 'L': + PACK_LENGTH_ADJUST(unsigned long,4); + while (len-- > 0) { + unsigned long tmp = 0; + memcpy(OFF32(&tmp), s, NATINT_LEN(unsigned long,4)); + s += NATINT_LEN(unsigned long,4); + rb_ary_push(ary, ULONG2NUM(tmp)); + } + PACK_ITEM_ADJUST(); + break; + + case 'q': + PACK_LENGTH_ADJUST_SIZE(QUAD_SIZE); + while (len-- > 0) { + char *tmp = (char*)s; + s += QUAD_SIZE; + rb_ary_push(ary, rb_quad_unpack(tmp, 1)); + } + PACK_ITEM_ADJUST(); + break; + case 'Q': + PACK_LENGTH_ADJUST_SIZE(QUAD_SIZE); + while (len-- > 0) { + char *tmp = (char*)s; + s += QUAD_SIZE; + rb_ary_push(ary, rb_quad_unpack(tmp, 0)); + } + break; + + case 'n': + PACK_LENGTH_ADJUST(unsigned short,2); + while (len-- > 0) { + unsigned short tmp = 0; + memcpy(OFF16B(&tmp), s, NATINT_LEN(unsigned short,2)); + s += NATINT_LEN(unsigned short,2); + rb_ary_push(ary, UINT2NUM(ntohs(tmp))); + } + PACK_ITEM_ADJUST(); + break; + + case 'N': + PACK_LENGTH_ADJUST(unsigned long,4); + while (len-- > 0) { + unsigned long tmp = 0; + memcpy(OFF32B(&tmp), s, NATINT_LEN(unsigned long,4)); + s += NATINT_LEN(unsigned long,4); + rb_ary_push(ary, ULONG2NUM(ntohl(tmp))); + } + PACK_ITEM_ADJUST(); + break; + + case 'v': + PACK_LENGTH_ADJUST(unsigned short,2); + while (len-- > 0) { + unsigned short tmp = 0; + memcpy(OFF16(&tmp), s, NATINT_LEN(unsigned short,2)); + s += NATINT_LEN(unsigned short,2); + rb_ary_push(ary, UINT2NUM(vtohs(tmp))); + } + PACK_ITEM_ADJUST(); + break; + + case 'V': + PACK_LENGTH_ADJUST(unsigned long,4); + while (len-- > 0) { + unsigned long tmp = 0; + memcpy(OFF32(&tmp), s, NATINT_LEN(long,4)); + s += NATINT_LEN(long,4); + rb_ary_push(ary, ULONG2NUM(vtohl(tmp))); + } + PACK_ITEM_ADJUST(); + break; + + case 'f': + case 'F': + PACK_LENGTH_ADJUST(float,sizeof(float)); + while (len-- > 0) { + float tmp; + memcpy(&tmp, s, sizeof(float)); + s += sizeof(float); + rb_ary_push(ary, rb_float_new((double)tmp)); + } + PACK_ITEM_ADJUST(); + break; + + case 'e': + PACK_LENGTH_ADJUST(float,sizeof(float)); + while (len-- > 0) { + float tmp; + FLOAT_CONVWITH(ftmp); + + memcpy(&tmp, s, sizeof(float)); + s += sizeof(float); + tmp = VTOHF(tmp,ftmp); + rb_ary_push(ary, rb_float_new((double)tmp)); + } + PACK_ITEM_ADJUST(); + break; + + case 'E': + PACK_LENGTH_ADJUST(double,sizeof(double)); + while (len-- > 0) { + double tmp; + DOUBLE_CONVWITH(dtmp); + + memcpy(&tmp, s, sizeof(double)); + s += sizeof(double); + tmp = VTOHD(tmp,dtmp); + rb_ary_push(ary, rb_float_new(tmp)); + } + PACK_ITEM_ADJUST(); + break; + + case 'D': + case 'd': + PACK_LENGTH_ADJUST(double,sizeof(double)); + while (len-- > 0) { + double tmp; + memcpy(&tmp, s, sizeof(double)); + s += sizeof(double); + rb_ary_push(ary, rb_float_new(tmp)); + } + PACK_ITEM_ADJUST(); + break; + + case 'g': + PACK_LENGTH_ADJUST(float,sizeof(float)); + while (len-- > 0) { + float tmp; + FLOAT_CONVWITH(ftmp;) + + memcpy(&tmp, s, sizeof(float)); + s += sizeof(float); + tmp = NTOHF(tmp,ftmp); + rb_ary_push(ary, rb_float_new((double)tmp)); + } + PACK_ITEM_ADJUST(); + break; + + case 'G': + PACK_LENGTH_ADJUST(double,sizeof(double)); + while (len-- > 0) { + double tmp; + DOUBLE_CONVWITH(dtmp); + + memcpy(&tmp, s, sizeof(double)); + s += sizeof(double); + tmp = NTOHD(tmp,dtmp); + rb_ary_push(ary, rb_float_new(tmp)); + } + PACK_ITEM_ADJUST(); + break; + + case 'U': + if (len > send - s) len = send - s; + while (len > 0 && s < send) { + long alen = send - s; + unsigned long l; + + l = utf8_to_uv(s, &alen); + s += alen; len--; + rb_ary_push(ary, ULONG2NUM(l)); + } + break; + + case 'u': + { + VALUE buf = infected_str_new(0, (send - s)*3/4, str); + char *ptr = RSTRING(buf)->ptr; + long total = 0; + + while (s < send && *s > ' ' && *s < 'a') { + long a,b,c,d; + char hunk[4]; + + hunk[3] = '\0'; + len = (*s++ - ' ') & 077; + total += len; + if (total > RSTRING(buf)->len) { + len -= total - RSTRING(buf)->len; + total = RSTRING(buf)->len; + } + + while (len > 0) { + long mlen = len > 3 ? 3 : len; + + if (s < send && *s >= ' ') + a = (*s++ - ' ') & 077; + else + a = 0; + if (s < send && *s >= ' ') + b = (*s++ - ' ') & 077; + else + b = 0; + if (s < send && *s >= ' ') + c = (*s++ - ' ') & 077; + else + c = 0; + if (s < send && *s >= ' ') + d = (*s++ - ' ') & 077; + else + d = 0; + hunk[0] = a << 2 | b >> 4; + hunk[1] = b << 4 | c >> 2; + hunk[2] = c << 6 | d; + memcpy(ptr, hunk, mlen); + ptr += mlen; + len -= mlen; + } + if (*s == '\r') s++; + if (*s == '\n') s++; + else if (s < send && (s+1 == send || s[1] == '\n')) + s += 2; /* possible checksum byte */ + } + + RSTRING(buf)->ptr[total] = '\0'; + RSTRING(buf)->len = total; + rb_ary_push(ary, buf); + } + break; + + case 'm': + { + VALUE buf = infected_str_new(0, (send - s)*3/4, str); + char *ptr = RSTRING(buf)->ptr; + int a = -1,b = -1,c = 0,d; + static int first = 1; + static int b64_xtable[256]; + + if (first) { + int i; + first = 0; + + for (i = 0; i < 256; i++) { + b64_xtable[i] = -1; + } + for (i = 0; i < 64; i++) { + b64_xtable[(int)b64_table[i]] = i; + } + } + while (s < send) { + while (s[0] == '\r' || s[0] == '\n') { s++; } + if ((a = b64_xtable[(int)s[0]]) == -1) break; + if ((b = b64_xtable[(int)s[1]]) == -1) break; + if ((c = b64_xtable[(int)s[2]]) == -1) break; + if ((d = b64_xtable[(int)s[3]]) == -1) break; + *ptr++ = a << 2 | b >> 4; + *ptr++ = b << 4 | c >> 2; + *ptr++ = c << 6 | d; + s += 4; + } + if (a != -1 && b != -1) { + if (s + 2 < send && s[2] == '=') + *ptr++ = a << 2 | b >> 4; + if (c != -1 && s + 3 < send && s[3] == '=') { + *ptr++ = a << 2 | b >> 4; + *ptr++ = b << 4 | c >> 2; + } + } + *ptr = '\0'; + RSTRING(buf)->len = ptr - RSTRING(buf)->ptr; + rb_ary_push(ary, buf); + } + break; + + case 'M': + { + VALUE buf = infected_str_new(0, send - s, str); + char *ptr = RSTRING(buf)->ptr; + int c1, c2; + + while (s < send) { + if (*s == '=') { + if (++s == send) break; + if (*s != '\n') { + if ((c1 = hex2num(*s)) == -1) break; + if (++s == send) break; + if ((c2 = hex2num(*s)) == -1) break; + *ptr++ = c1 << 4 | c2; + } + } + else { + *ptr++ = *s; + } + s++; + } + *ptr = '\0'; + RSTRING(buf)->len = ptr - RSTRING(buf)->ptr; + rb_ary_push(ary, buf); + } + break; + + case '@': + if (len > RSTRING(str)->len) + rb_raise(rb_eArgError, "@ outside of string"); + s = RSTRING(str)->ptr + len; + break; + + case 'X': + if (len > s - RSTRING(str)->ptr) + rb_raise(rb_eArgError, "X outside of string"); + s -= len; + break; + + case 'x': + if (len > send - s) + rb_raise(rb_eArgError, "x outside of string"); + s += len; + break; + + case 'P': + if (sizeof(char *) <= send - s) { + char *t; + VALUE tmp; + + memcpy(&t, s, sizeof(char *)); + s += sizeof(char *); + + if (t) { + VALUE a, *p, *pend; + + if (!(a = rb_str_associated(str))) { + rb_raise(rb_eArgError, "no associated pointer"); + } + p = RARRAY(a)->ptr; + pend = p + RARRAY(a)->len; + while (p < pend) { + if (TYPE(*p) == T_STRING && RSTRING(*p)->ptr == t) { + if (len > RSTRING(*p)->len) { + len = RSTRING(*p)->len; + } + break; + } + p++; + } + if (p == pend) { + rb_raise(rb_eArgError, "non associated pointer"); + } + tmp = rb_tainted_str_new(t, len); + } + else { + tmp = Qnil; + } + rb_ary_push(ary, tmp); + } + break; + + case 'p': + if (len > (send - s) / sizeof(char *)) + len = (send - s) / sizeof(char *); + while (len-- > 0) { + if (send - s < sizeof(char *)) + break; + else { + VALUE tmp; + char *t; + + memcpy(&t, s, sizeof(char *)); + s += sizeof(char *); + + if (t) { + VALUE a, *p, *pend; + + if (!(a = rb_str_associated(str))) { + rb_raise(rb_eArgError, "no associated pointer"); + } + p = RARRAY(a)->ptr; + pend = p + RARRAY(a)->len; + while (p < pend) { + if (TYPE(*p) == T_STRING && RSTRING(*p)->ptr == t) { + break; + } + p++; + } + if (p == pend) { + rb_raise(rb_eArgError, "non associated pointer"); + } + tmp = rb_str_new2(t); + OBJ_INFECT(tmp, str); + } + else { + tmp = Qnil; + } + rb_ary_push(ary, tmp); + } + } + break; + + case 'w': + { + unsigned long ul = 0; + unsigned long ulmask = 0xfeL << ((sizeof(unsigned long) - 1) * 8); + + while (len > 0 && s < send) { + ul <<= 7; + ul |= (*s & 0x7f); + if (!(*s++ & 0x80)) { + rb_ary_push(ary, ULONG2NUM(ul)); + len--; + ul = 0; + } + else if (ul & ulmask) { + VALUE big = rb_uint2big(ul); + VALUE big128 = rb_uint2big(128); + while (s < send) { + big = rb_big_mul(big, big128); + big = rb_big_plus(big, rb_uint2big(*s & 0x7f)); + if (!(*s++ & 0x80)) { + rb_ary_push(ary, big); + len--; + ul = 0; + break; + } + } + } + } + } + break; + + default: + break; + } + } + + return ary; +} + +#define BYTEWIDTH 8 + +static int +uv_to_utf8(buf, uv) + char *buf; + unsigned long uv; +{ + if (uv <= 0x7f) { + buf[0] = (char)uv; + return 1; + } + if (uv <= 0x7ff) { + buf[0] = ((uv>>6)&0xff)|0xc0; + buf[1] = (uv&0x3f)|0x80; + return 2; + } + if (uv <= 0xffff) { + buf[0] = ((uv>>12)&0xff)|0xe0; + buf[1] = ((uv>>6)&0x3f)|0x80; + buf[2] = (uv&0x3f)|0x80; + return 3; + } + if (uv <= 0x1fffff) { + buf[0] = ((uv>>18)&0xff)|0xf0; + buf[1] = ((uv>>12)&0x3f)|0x80; + buf[2] = ((uv>>6)&0x3f)|0x80; + buf[3] = (uv&0x3f)|0x80; + return 4; + } + if (uv <= 0x3ffffff) { + buf[0] = ((uv>>24)&0xff)|0xf8; + buf[1] = ((uv>>18)&0x3f)|0x80; + buf[2] = ((uv>>12)&0x3f)|0x80; + buf[3] = ((uv>>6)&0x3f)|0x80; + buf[4] = (uv&0x3f)|0x80; + return 5; + } + if (uv <= 0x7fffffff) { + buf[0] = ((uv>>30)&0xff)|0xfc; + buf[1] = ((uv>>24)&0x3f)|0x80; + buf[2] = ((uv>>18)&0x3f)|0x80; + buf[3] = ((uv>>12)&0x3f)|0x80; + buf[4] = ((uv>>6)&0x3f)|0x80; + buf[5] = (uv&0x3f)|0x80; + return 6; + } + rb_raise(rb_eRangeError, "pack(U): value out of range"); +} + +static const long utf8_limits[] = { + 0x0, /* 1 */ + 0x80, /* 2 */ + 0x800, /* 3 */ + 0x10000, /* 4 */ + 0x200000, /* 5 */ + 0x4000000, /* 6 */ + 0x80000000, /* 7 */ +}; + +static unsigned long +utf8_to_uv(p, lenp) + char *p; + long *lenp; +{ + int c = *p++ & 0xff; + unsigned long uv = c; + long n; + + if (!(uv & 0x80)) { + *lenp = 1; + return uv; + } + if (!(uv & 0x40)) { + *lenp = 1; + rb_raise(rb_eArgError, "malformed UTF-8 character"); + } + + if (!(uv & 0x20)) { n = 2; uv &= 0x1f; } + else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; } + else if (!(uv & 0x08)) { n = 4; uv &= 0x07; } + else if (!(uv & 0x04)) { n = 5; uv &= 0x03; } + else if (!(uv & 0x02)) { n = 6; uv &= 0x01; } + else { + *lenp = 1; + rb_raise(rb_eArgError, "malformed UTF-8 character"); + } + if (n > *lenp) { + rb_raise(rb_eArgError, "malformed UTF-8 character (expected %d bytes, given %d bytes)", + n, *lenp); + } + *lenp = n--; + if (n != 0) { + while (n--) { + c = *p++ & 0xff; + if ((c & 0xc0) != 0x80) { + *lenp -= n + 1; + rb_raise(rb_eArgError, "malformed UTF-8 character"); + } + else { + c &= 0x3f; + uv = uv << 6 | c; + } + } + } + n = *lenp - 1; + if (uv < utf8_limits[n]) { + rb_raise(rb_eArgError, "redundant UTF-8 sequence"); + } + return uv; +} + +void +Init_pack() +{ + rb_define_method(rb_cArray, "pack", pack_pack, 1); + rb_define_method(rb_cString, "unpack", pack_unpack, 1); +} +/********************************************************************** + + prec.c - + + $Author: nobu $ + $Date: 2004/04/14 04:06:25 $ + created at: Tue Jan 26 02:40:41 2000 + + Copyright (C) 1993-2003 Yukihiro Matsumoto + +**********************************************************************/ + +#include "ruby.h" + +VALUE rb_mPrecision; + +static ID prc_pr, prc_if; + + +/* + * call-seq: + * num.prec(klass) => a_klass + * + * Converts _self_ into an instance of _klass_. By default, + * +prec+ invokes + * + * klass.induced_from(num) + * + * and returns its value. So, if <code>klass.induced_from</code> + * doesn't return an instance of _klass_, it will be necessary + * to reimplement +prec+. + */ + +static VALUE +prec_prec(x, klass) + VALUE x, klass; +{ + return rb_funcall(klass, prc_if, 1, x); +} + +/* + * call-seq: + * num.prec_i => Integer + * + * Returns an +Integer+ converted from _num_. It is equivalent + * to <code>prec(Integer)</code>. + */ + +static VALUE +prec_prec_i(x) + VALUE x; +{ + VALUE klass = rb_cInteger; + + return rb_funcall(x, prc_pr, 1, klass); +} + +/* + * call-seq: + * num.prec_f => Integer + * + * Returns an +Float+ converted from _num_. It is equivalent + * to <code>prec(Float)</code>. + */ + +static VALUE +prec_prec_f(x) + VALUE x; +{ + VALUE klass = rb_cFloat; + + return rb_funcall(x, prc_pr, 1, klass); +} + +/* + * call-seq: + * Mod.induced_from(number) => a_mod + * + * Creates an instance of mod from. This method is overridden + * by concrete +Numeric+ classes, so that (for example) + * + * Fixnum.induced_from(9.9) #=> 9 + * + * Note that a use of +prec+ in a redefinition may cause + * an infinite loop. + */ + +static VALUE +prec_induced_from(module, x) + VALUE module, x; +{ + rb_raise(rb_eTypeError, "undefined conversion from %s into %s", + rb_obj_classname(x), rb_class2name(module)); + return Qnil; /* not reached */ +} + +/* + * call_seq: + * included + * + * When the +Precision+ module is mixed-in to a class, this +included+ + * method is used to add our default +induced_from+ implementation + * to the host class. + */ + +static VALUE +prec_included(module, include) + VALUE module, include; +{ + switch (TYPE(include)) { + case T_CLASS: + case T_MODULE: + break; + default: + Check_Type(include, T_CLASS); + break; + } + rb_define_singleton_method(include, "induced_from", prec_induced_from, 1); + return module; +} + +/* + * Precision is a mixin for concrete numeric classes with + * precision. Here, `precision' means the fineness of approximation + * of a real number, so, this module should not be included into + * anything which is not a subset of Real (so it should not be + * included in classes such as +Complex+ or +Matrix+). +*/ + +void +Init_Precision() +{ + rb_mPrecision = rb_define_module("Precision"); + rb_define_singleton_method(rb_mPrecision, "included", prec_included, 1); + rb_define_method(rb_mPrecision, "prec", prec_prec, 1); + rb_define_method(rb_mPrecision, "prec_i", prec_prec_i, 0); + rb_define_method(rb_mPrecision, "prec_f", prec_prec_f, 0); + + prc_pr = rb_intern("prec"); + prc_if = rb_intern("induced_from"); +} +/********************************************************************** + + process.c - + + $Author: matz $ + $Date: 2005/03/04 06:47:41 $ + created at: Tue Aug 10 14:30:50 JST 1993 + + Copyright (C) 1993-2003 Yukihiro Matsumoto + Copyright (C) 2000 Network Applied Communication Laboratory, Inc. + Copyright (C) 2000 Information-technology Promotion Agency, Japan + +**********************************************************************/ + +#include "ruby.h" +#include "rubysig.h" +#include <stdio.h> +#include <errno.h> +#include <signal.h> +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_FCNTL_H +#include <fcntl.h> +#endif +#ifdef __DJGPP__ +#include <process.h> +#endif + +#include <time.h> +#include <ctype.h> + +#ifndef EXIT_SUCCESS +#define EXIT_SUCCESS 0 +#endif +#ifndef EXIT_FAILURE +#define EXIT_FAILURE 1 +#endif + +struct timeval rb_time_interval _((VALUE)); + +#ifdef HAVE_SYS_WAIT_H +# include <sys/wait.h> +#endif +#ifdef HAVE_SYS_RESOURCE_H +# include <sys/resource.h> +#endif +#include "st.h" + +#ifdef __EMX__ +#undef HAVE_GETPGRP +#endif + +#ifdef HAVE_SYS_TIMES_H +#include <sys/times.h> +#endif + +#ifdef HAVE_GRP_H +#include <grp.h> +#endif + +#if defined(HAVE_TIMES) || defined(_WIN32) +static VALUE S_Tms; +#endif + +#ifndef WIFEXITED +#define WIFEXITED(w) (((w) & 0xff) == 0) +#endif +#ifndef WIFSIGNALED +#define WIFSIGNALED(w) (((w) & 0x7f) > 0 && (((w) & 0x7f) < 0x7f)) +#endif +#ifndef WIFSTOPPED +#define WIFSTOPPED(w) (((w) & 0xff) == 0x7f) +#endif +#ifndef WEXITSTATUS +#define WEXITSTATUS(w) (((w) >> 8) & 0xff) +#endif +#ifndef WTERMSIG +#define WTERMSIG(w) ((w) & 0x7f) +#endif +#ifndef WSTOPSIG +#define WSTOPSIG WEXITSTATUS +#endif + +#if defined(__APPLE__) && ( defined(__MACH__) || defined(__DARWIN__) ) && !defined(__MacOS_X__) +#define __MacOS_X__ 1 +#endif + +#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) +#define HAVE_44BSD_SETUID 1 +#define HAVE_44BSD_SETGID 1 +#endif + +#ifdef __NetBSD__ +#undef HAVE_SETRUID +#undef HAVE_SETRGID +#endif + +#if defined(__MacOS_X__) || defined(__bsdi__) +#define BROKEN_SETREUID 1 +#define BROKEN_SETREGID 1 +#endif + +#if defined(HAVE_44BSD_SETUID) || defined(__MacOS_X__) +#if !defined(USE_SETREUID) && !defined(BROKEN_SETREUID) +#define OBSOLETE_SETREUID 1 +#endif +#if !defined(USE_SETREGID) && !defined(BROKEN_SETREGID) +#define OBSOLETE_SETREGID 1 +#endif +#endif + +#define preserving_errno(stmts) \ + do {int saved_errno = errno; stmts; errno = saved_errno;} while (0) + + +/* + * call-seq: + * Process.pid => fixnum + * + * Returns the process id of this process. Not available on all + * platforms. + * + * Process.pid #=> 27415 + */ + +static VALUE +get_pid() +{ + rb_secure(2); + return INT2FIX(getpid()); +} + + +/* + * call-seq: + * Process.ppid => fixnum + * + * Returns the process id of the parent of this process. Always + * returns 0 on NT. Not available on all platforms. + * + * puts "I am #{Process.pid}" + * Process.fork { puts "Dad is #{Process.ppid}" } + * + * <em>produces:</em> + * + * I am 27417 + * Dad is 27417 + */ + +static VALUE +get_ppid() +{ + rb_secure(2); +#ifdef _WIN32 + return INT2FIX(0); +#else + return INT2FIX(getppid()); +#endif +} + + +/********************************************************************* + * + * Document-class: Process::Status + * + * <code>Process::Status</code> encapsulates the information on the + * status of a running or terminated system process. The built-in + * variable <code>$?</code> is either +nil+ or a + * <code>Process::Status</code> object. + * + * fork { exit 99 } #=> 26557 + * Process.wait #=> 26557 + * $?.class #=> Process::Status + * $?.to_i #=> 25344 + * $? >> 8 #=> 99 + * $?.stopped? #=> false + * $?.exited? #=> true + * $?.exitstatus #=> 99 + * + * Posix systems record information on processes using a 16-bit + * integer. The lower bits record the process status (stopped, + * exited, signaled) and the upper bits possibly contain additional + * information (for example the program's return code in the case of + * exited processes). Pre Ruby 1.8, these bits were exposed directly + * to the Ruby program. Ruby now encapsulates these in a + * <code>Process::Status</code> object. To maximize compatibility, + * however, these objects retain a bit-oriented interface. In the + * descriptions that follow, when we talk about the integer value of + * _stat_, we're referring to this 16 bit value. + */ + +static VALUE rb_cProcStatus; +VALUE rb_last_status = Qnil; + +static void +last_status_set(status, pid) + int status, pid; +{ + rb_last_status = rb_obj_alloc(rb_cProcStatus); + rb_iv_set(rb_last_status, "status", INT2FIX(status)); + rb_iv_set(rb_last_status, "pid", INT2FIX(pid)); +} + + +/* + * call-seq: + * stat.to_i => fixnum + * stat.to_int => fixnum + * + * Returns the bits in _stat_ as a <code>Fixnum</code>. Poking + * around in these bits is platform dependent. + * + * fork { exit 0xab } #=> 26566 + * Process.wait #=> 26566 + * sprintf('%04x', $?.to_i) #=> "ab00" + */ + +static VALUE +pst_to_i(st) + VALUE st; +{ + return rb_iv_get(st, "status"); +} + + +/* + * call-seq: + * stat.to_s => string + * + * Equivalent to _stat_<code>.to_i.to_s</code>. + */ + +static VALUE +pst_to_s(st) + VALUE st; +{ + return rb_fix2str(pst_to_i(st), 10); +} + + +/* + * call-seq: + * stat.pid => fixnum + * + * Returns the process ID that this status object represents. + * + * fork { exit } #=> 26569 + * Process.wait #=> 26569 + * $?.pid #=> 26569 + */ + +static VALUE +pst_pid(st) + VALUE st; +{ + return rb_iv_get(st, "pid"); +} + + +/* + * call-seq: + * stat.inspect => string + * + * Override the inspection method. + */ + +static VALUE +pst_inspect(st) + VALUE st; +{ + VALUE pid; + int status; + VALUE str; + char buf[256]; + + pid = pst_pid(st); + status = NUM2INT(st); + + snprintf(buf, sizeof(buf), "#<%s: pid=%ld", rb_class2name(CLASS_OF(st)), NUM2LONG(pid)); + str = rb_str_new2(buf); + if (WIFSTOPPED(status)) { + int stopsig = WSTOPSIG(status); + const char *signame = ruby_signal_name(stopsig); + if (signame) { + snprintf(buf, sizeof(buf), ",stopped(SIG%s=%d)", signame, stopsig); + } + else { + snprintf(buf, sizeof(buf), ",stopped(%d)", stopsig); + } + rb_str_cat2(str, buf); + } + if (WIFSIGNALED(status)) { + int termsig = WTERMSIG(status); + const char *signame = ruby_signal_name(termsig); + if (signame) { + snprintf(buf, sizeof(buf), ",signaled(SIG%s=%d)", signame, termsig); + } + else { + snprintf(buf, sizeof(buf), ",signaled(%d)", termsig); + } + rb_str_cat2(str, buf); + } + if (WIFEXITED(status)) { + snprintf(buf, sizeof(buf), ",exited(%d)", WEXITSTATUS(status)); + rb_str_cat2(str, buf); + } +#ifdef WCOREDUMP + if (WCOREDUMP(status)) { + rb_str_cat2(str, ",coredumped"); + } +#endif + rb_str_cat2(str, ">"); + return str; +} + + +/* + * call-seq: + * stat == other => true or false + * + * Returns +true+ if the integer value of _stat_ + * equals <em>other</em>. + */ + +static VALUE +pst_equal(st1, st2) + VALUE st1, st2; +{ + if (st1 == st2) return Qtrue; + return rb_equal(pst_to_i(st1), st2); +} + + +/* + * call-seq: + * stat & num => fixnum + * + * Logical AND of the bits in _stat_ with <em>num</em>. + * + * fork { exit 0x37 } + * Process.wait + * sprintf('%04x', $?.to_i) #=> "3700" + * sprintf('%04x', $? & 0x1e00) #=> "1600" + */ + +static VALUE +pst_bitand(st1, st2) + VALUE st1, st2; +{ + int status = NUM2INT(st1) & NUM2INT(st2); + + return INT2NUM(status); +} + + +/* + * call-seq: + * stat >> num => fixnum + * + * Shift the bits in _stat_ right <em>num</em> places. + * + * fork { exit 99 } #=> 26563 + * Process.wait #=> 26563 + * $?.to_i #=> 25344 + * $? >> 8 #=> 99 + */ + +static VALUE +pst_rshift(st1, st2) + VALUE st1, st2; +{ + int status = NUM2INT(st1) >> NUM2INT(st2); + + return INT2NUM(status); +} + + +/* + * call-seq: + * stat.stopped? => true or false + * + * Returns +true+ if this process is stopped. This is only + * returned if the corresponding <code>wait</code> call had the + * <code>WUNTRACED</code> flag set. + */ + +static VALUE +pst_wifstopped(st) + VALUE st; +{ + int status = NUM2INT(st); + + if (WIFSTOPPED(status)) + return Qtrue; + else + return Qfalse; +} + + +/* + * call-seq: + * stat.stopsig => fixnum or nil + * + * Returns the number of the signal that caused _stat_ to stop + * (or +nil+ if self is not stopped). + */ + +static VALUE +pst_wstopsig(st) + VALUE st; +{ + int status = NUM2INT(st); + + if (WIFSTOPPED(status)) + return INT2NUM(WSTOPSIG(status)); + return Qnil; +} + + +/* + * call-seq: + * stat.signaled? => true or false + * + * Returns +true+ if _stat_ terminated because of + * an uncaught signal. + */ + +static VALUE +pst_wifsignaled(st) + VALUE st; +{ + int status = NUM2INT(st); + + if (WIFSIGNALED(status)) + return Qtrue; + else + return Qfalse; +} + + +/* + * call-seq: + * stat.termsig => fixnum or nil + * + * Returns the number of the signal that caused _stat_ to + * terminate (or +nil+ if self was not terminated by an + * uncaught signal). + */ + +static VALUE +pst_wtermsig(st) + VALUE st; +{ + int status = NUM2INT(st); + + if (WIFSIGNALED(status)) + return INT2NUM(WTERMSIG(status)); + return Qnil; +} + + +/* + * call-seq: + * stat.exited? => true or false + * + * Returns +true+ if _stat_ exited normally (for + * example using an <code>exit()</code> call or finishing the + * program). + */ + +static VALUE +pst_wifexited(st) + VALUE st; +{ + int status = NUM2INT(st); + + if (WIFEXITED(status)) + return Qtrue; + else + return Qfalse; +} + + +/* + * call-seq: + * stat.exitstatus => fixnum or nil + * + * Returns the least significant eight bits of the return code of + * _stat_. Only available if <code>exited?</code> is + * +true+. + * + * fork { } #=> 26572 + * Process.wait #=> 26572 + * $?.exited? #=> true + * $?.exitstatus #=> 0 + * + * fork { exit 99 } #=> 26573 + * Process.wait #=> 26573 + * $?.exited? #=> true + * $?.exitstatus #=> 99 + */ + +static VALUE +pst_wexitstatus(st) + VALUE st; +{ + int status = NUM2INT(st); + + if (WIFEXITED(status)) + return INT2NUM(WEXITSTATUS(status)); + return Qnil; +} + + +/* + * call-seq: + * stat.success? => true, false or nil + * + * Returns +true+ if _stat_ is successful, +false+ if not. + * Returns +nil+ if <code>exited?</code> is not +true+. + */ + +static VALUE +pst_success_p(st) + VALUE st; +{ + int status = NUM2INT(st); + + if (!WIFEXITED(status)) + return Qnil; + return WEXITSTATUS(status) == EXIT_SUCCESS ? Qtrue : Qfalse; +} + + +/* + * call-seq: + * stat.coredump? => true or false + * + * Returns +true+ if _stat_ generated a coredump + * when it terminated. Not available on all platforms. + */ + +static VALUE +pst_wcoredump(st) + VALUE st; +{ +#ifdef WCOREDUMP + int status = NUM2INT(st); + + if (WCOREDUMP(status)) + return Qtrue; + else + return Qfalse; +#else + return Qfalse; +#endif +} + +#if !defined(HAVE_WAITPID) && !defined(HAVE_WAIT4) +#define NO_WAITPID +static st_table *pid_tbl; +#endif + +int +rb_waitpid(pid, st, flags) + int pid; + int *st; + int flags; +{ + int result; +#ifndef NO_WAITPID + int oflags = flags; + if (!rb_thread_alone()) { /* there're other threads to run */ + flags |= WNOHANG; + } + + retry: + TRAP_BEG; +#ifdef HAVE_WAITPID + result = waitpid(pid, st, flags); +#else /* HAVE_WAIT4 */ + result = wait4(pid, st, flags, NULL); +#endif + TRAP_END; + if (result < 0) { + if (errno == EINTR) { + rb_thread_polling(); + goto retry; + } + return -1; + } + if (result == 0) { + if (oflags & WNOHANG) return 0; + rb_thread_polling(); + if (rb_thread_alone()) flags = oflags; + goto retry; + } +#else /* NO_WAITPID */ + if (pid_tbl && st_lookup(pid_tbl, pid, (st_data_t *)st)) { + last_status_set(*st, pid); + st_delete(pid_tbl, (st_data_t*)&pid, NULL); + return pid; + } + + if (flags) { + rb_raise(rb_eArgError, "can't do waitpid with flags"); + } + + for (;;) { + TRAP_BEG; + result = wait(st); + TRAP_END; + if (result < 0) { + if (errno == EINTR) { + rb_thread_schedule(); + continue; + } + return -1; + } + if (result == pid) { + break; + } + if (!pid_tbl) + pid_tbl = st_init_numtable(); + st_insert(pid_tbl, pid, (st_data_t)st); + if (!rb_thread_alone()) rb_thread_schedule(); + } +#endif + if (result > 0) { + last_status_set(*st, result); + } + return result; +} + +#ifdef NO_WAITPID +struct wait_data { + int pid; + int status; +}; + +static int +wait_each(pid, status, data) + int pid, status; + struct wait_data *data; +{ + if (data->status != -1) return ST_STOP; + + data->pid = pid; + data->status = status; + return ST_DELETE; +} + +static int +waitall_each(pid, status, ary) + int pid, status; + VALUE ary; +{ + last_status_set(status, pid); + rb_ary_push(ary, rb_assoc_new(INT2NUM(pid), rb_last_status)); + return ST_DELETE; +} +#endif + + +/* [MG]:FIXME: I wasn't sure how this should be done, since ::wait() + has historically been documented as if it didn't take any arguments + despite the fact that it's just an alias for ::waitpid(). The way I + have it below is more truthful, but a little confusing. + + I also took the liberty of putting in the pid values, as they're + pretty useful, and it looked as if the original 'ri' output was + supposed to contain them after "[...]depending on the value of + aPid:". + + The 'ansi' and 'bs' formats of the ri output don't display the + definition list for some reason, but the plain text one does. + */ + +/* + * call-seq: + * Process.wait() => fixnum + * Process.wait(pid=-1, flags=0) => fixnum + * Process.waitpid(pid=-1, flags=0) => fixnum + * + * Waits for a child process to exit, returns its process id, and + * sets <code>$?</code> to a <code>Process::Status</code> object + * containing information on that process. Which child it waits on + * depends on the value of _pid_: + * + * > 0:: Waits for the child whose process ID equals _pid_. + * + * 0:: Waits for any child whose process group ID equals that of the + * calling process. + * + * -1:: Waits for any child process (the default if no _pid_ is + * given). + * + * < -1:: Waits for any child whose process group ID equals the absolute + * value of _pid_. + * + * The _flags_ argument may be a logical or of the flag values + * <code>Process::WNOHANG</code> (do not block if no child available) + * or <code>Process::WUNTRACED</code> (return stopped children that + * haven't been reported). Not all flags are available on all + * platforms, but a flag value of zero will work on all platforms. + * + * Calling this method raises a <code>SystemError</code> if there are + * no child processes. Not available on all platforms. + * + * include Process + * fork { exit 99 } #=> 27429 + * wait #=> 27429 + * $?.exitstatus #=> 99 + * + * pid = fork { sleep 3 } #=> 27440 + * Time.now #=> Wed Apr 09 08:57:09 CDT 2003 + * waitpid(pid, Process::WNOHANG) #=> nil + * Time.now #=> Wed Apr 09 08:57:09 CDT 2003 + * waitpid(pid, 0) #=> 27440 + * Time.now #=> Wed Apr 09 08:57:12 CDT 2003 + */ + +static VALUE +proc_wait(argc, argv) + int argc; + VALUE *argv; +{ + VALUE vpid, vflags; + int pid, flags, status; + + rb_secure(2); + flags = 0; + rb_scan_args(argc, argv, "02", &vpid, &vflags); + if (argc == 0) { + pid = -1; + } + else { + pid = NUM2INT(vpid); + if (argc == 2 && !NIL_P(vflags)) { + flags = NUM2UINT(vflags); + } + } + if ((pid = rb_waitpid(pid, &status, flags)) < 0) + rb_sys_fail(0); + if (pid == 0) { + return rb_last_status = Qnil; + } + return INT2FIX(pid); +} + + +/* + * call-seq: + * Process.wait2(pid=-1, flags=0) => [pid, status] + * Process.waitpid2(pid=-1, flags=0) => [pid, status] + * + * Waits for a child process to exit (see Process::waitpid for exact + * semantics) and returns an array containing the process id and the + * exit status (a <code>Process::Status</code> object) of that + * child. Raises a <code>SystemError</code> if there are no child + * processes. + * + * Process.fork { exit 99 } #=> 27437 + * pid, status = Process.wait2 + * pid #=> 27437 + * status.exitstatus #=> 99 + */ + +static VALUE +proc_wait2(argc, argv) + int argc; + VALUE *argv; +{ + VALUE pid = proc_wait(argc, argv); + if (NIL_P(pid)) return Qnil; + return rb_assoc_new(pid, rb_last_status); +} + + +/* + * call-seq: + * Process.waitall => [ [pid1,status1], ...] + * + * Waits for all children, returning an array of + * _pid_/_status_ pairs (where _status_ is a + * <code>Process::Status</code> object). + * + * fork { sleep 0.2; exit 2 } #=> 27432 + * fork { sleep 0.1; exit 1 } #=> 27433 + * fork { exit 0 } #=> 27434 + * p Process.waitall + * + * <em>produces</em>: + * + * [[27434, #<Process::Status: pid=27434,exited(0)>], + * [27433, #<Process::Status: pid=27433,exited(1)>], + * [27432, #<Process::Status: pid=27432,exited(2)>]] + */ + +static VALUE +proc_waitall() +{ + VALUE result; + int pid, status; + + rb_secure(2); + result = rb_ary_new(); +#ifdef NO_WAITPID + if (pid_tbl) { + st_foreach(pid_tbl, waitall_each, result); + } + + for (pid = -1;;) { + pid = wait(&status); + if (pid == -1) { + if (errno == ECHILD) + break; + if (errno == EINTR) { + rb_thread_schedule(); + continue; + } + rb_sys_fail(0); + } + last_status_set(status, pid); + rb_ary_push(result, rb_assoc_new(INT2NUM(pid), rb_last_status)); + } +#else + rb_last_status = Qnil; + for (pid = -1;;) { + pid = rb_waitpid(-1, &status, 0); + if (pid == -1) { + if (errno == ECHILD) + break; + rb_sys_fail(0); + } + rb_ary_push(result, rb_assoc_new(INT2NUM(pid), rb_last_status)); + } +#endif + return result; +} + +static VALUE +detach_process_watcher(pid_p) + int *pid_p; +{ + int cpid, status; + + for (;;) { + cpid = rb_waitpid(*pid_p, &status, WNOHANG); + if (cpid == -1) return rb_last_status; + rb_thread_sleep(1); + } +} + +VALUE +rb_detach_process(pid) + int pid; +{ + return rb_thread_create(detach_process_watcher, (void*)&pid); +} + + +/* + * call-seq: + * Process.detach(pid) => thread + * + * Some operating systems retain the status of terminated child + * processes until the parent collects that status (normally using + * some variant of <code>wait()</code>. If the parent never collects + * this status, the child stays around as a <em>zombie</em> process. + * <code>Process::detach</code> prevents this by setting up a + * separate Ruby thread whose sole job is to reap the status of the + * process _pid_ when it terminates. Use <code>detach</code> + * only when you do not intent to explicitly wait for the child to + * terminate. <code>detach</code> only checks the status + * periodically (currently once each second). + * + * The waiting thread returns the exit status of the detached process + * when it terminates, so you can use <code>Thread#join</code> to + * know the result. If specified _pid_ is not a valid child process + * ID, the thread returns +nil+ immediately. + * + * In this first example, we don't reap the first child process, so + * it appears as a zombie in the process status display. + * + * p1 = fork { sleep 0.1 } + * p2 = fork { sleep 0.2 } + * Process.waitpid(p2) + * sleep 2 + * system("ps -ho pid,state -p #{p1}") + * + * <em>produces:</em> + * + * 27389 Z + * + * In the next example, <code>Process::detach</code> is used to reap + * the child automatically. + * + * p1 = fork { sleep 0.1 } + * p2 = fork { sleep 0.2 } + * Process.detach(p1) + * Process.waitpid(p2) + * sleep 2 + * system("ps -ho pid,state -p #{p1}") + * + * <em>(produces no output)</em> + */ + +static VALUE +proc_detach(obj, pid) + VALUE pid; +{ + rb_secure(2); + return rb_detach_process(NUM2INT(pid)); +} + +#ifndef HAVE_STRING_H +char *strtok(); +#endif + +#ifdef HAVE_SETITIMER +#define before_exec() rb_thread_stop_timer() +#define after_exec() rb_thread_start_timer() +#else +#define before_exec() +#define after_exec() +#endif + +extern char *dln_find_exe(); + +static void +security(str) + const char *str; +{ + if (rb_env_path_tainted()) { + if (rb_safe_level() > 0) { + rb_raise(rb_eSecurityError, "Insecure PATH - %s", str); + } + } +} + +static int +proc_exec_v(argv, prog) + char **argv; + const char *prog; +{ + if (!prog) + prog = argv[0]; + security(prog); + prog = dln_find_exe(prog, 0); + if (!prog) { + errno = ENOENT; + return -1; + } + +#if (defined(MSDOS) && !defined(DJGPP)) || defined(__human68k__) || defined(__EMX__) || defined(OS2) + { +#if defined(__human68k__) +#define COMMAND "command.x" +#endif +#if defined(__EMX__) || defined(OS2) /* OS/2 emx */ +#define COMMAND "cmd.exe" +#endif +#if (defined(MSDOS) && !defined(DJGPP)) +#define COMMAND "command.com" +#endif + char *extension; + + if ((extension = strrchr(prog, '.')) != NULL && strcasecmp(extension, ".bat") == 0) { + char **new_argv; + char *p; + int n; + + for (n = 0; argv[n]; n++) + /* no-op */; + new_argv = ALLOCA_N(char*, n + 2); + for (; n > 0; n--) + new_argv[n + 1] = argv[n]; + new_argv[1] = strcpy(ALLOCA_N(char, strlen(argv[0]) + 1), argv[0]); + for (p = new_argv[1]; *p != '\0'; p++) + if (*p == '/') + *p = '\\'; + new_argv[0] = COMMAND; + argv = new_argv; + prog = dln_find_exe(argv[0], 0); + if (!prog) { + errno = ENOENT; + return -1; + } + } + } +#endif /* MSDOS or __human68k__ or __EMX__ */ + before_exec(); + execv(prog, argv); + preserving_errno(after_exec()); + return -1; +} + +int +rb_proc_exec_n(argc, argv, prog) + int argc; + VALUE *argv; + const char *prog; +{ + char **args; + int i; + + args = ALLOCA_N(char*, argc+1); + for (i=0; i<argc; i++) { + args[i] = RSTRING(argv[i])->ptr; + } + args[i] = 0; + if (args[0]) { + return proc_exec_v(args, prog); + } + return -1; +} + +int +rb_proc_exec(str) + const char *str; +{ + const char *s = str; + char *ss, *t; + char **argv, **a; + + while (*str && ISSPACE(*str)) + str++; + +#ifdef _WIN32 + before_exec(); + rb_w32_spawn(P_OVERLAY, (char *)str, 0); + after_exec(); +#else + for (s=str; *s; s++) { + if (ISSPACE(*s)) { + const char *p, *nl = NULL; + for (p = s; ISSPACE(*p); p++) { + if (*p == '\n') nl = p; + } + if (!*p) break; + if (nl) s = nl; + } + if (*s != ' ' && !ISALPHA(*s) && strchr("*?{}[]<>()~&|\\$;'`\"\n",*s)) { + int status; +#if defined(MSDOS) + before_exec(); + status = system(str); + after_exec(); + if (status != -1) + exit(status); +#elif defined(__human68k__) || defined(__CYGWIN32__) || defined(__EMX__) + char *shell = dln_find_exe("sh", 0); + status = -1; + before_exec(); + if (shell) + execl(shell, "sh", "-c", str, (char *) NULL); + else + status = system(str); + after_exec(); + if (status != -1) + exit(status); +#else + before_exec(); + execl("/bin/sh", "sh", "-c", str, (char *)NULL); + preserving_errno(after_exec()); +#endif + return -1; + } + } + a = argv = ALLOCA_N(char*, (s-str)/2+2); + ss = ALLOCA_N(char, s-str+1); + memcpy(ss, str, s-str); + ss[s-str] = '\0'; + if (*a++ = strtok(ss, " \t")) { + while (t = strtok(NULL, " \t")) { + *a++ = t; + } + *a = NULL; + } + if (argv[0]) { + return proc_exec_v(argv, 0); + } + errno = ENOENT; +#endif /* _WIN32 */ + return -1; +} + +#if defined(_WIN32) +#define HAVE_SPAWNV 1 +#endif + +#if !defined(HAVE_FORK) && defined(HAVE_SPAWNV) +static int +proc_spawn_v(argv, prog) + char **argv; + char *prog; +{ +#if defined(_WIN32) + char *cmd = ALLOCA_N(char, rb_w32_argv_size(argv)); + if (!prog) prog = argv[0]; + return rb_w32_spawn(P_NOWAIT, rb_w32_join_argv(cmd, argv), prog); +#else + char *extension; + int status; + + if (!prog) + prog = argv[0]; + security(prog); + prog = dln_find_exe(prog, 0); + if (!prog) + return -1; + +#if defined(__human68k__) + if ((extension = strrchr(prog, '.')) != NULL && strcasecmp(extension, ".bat") == 0) { + char **new_argv; + char *p; + int n; + + for (n = 0; argv[n]; n++) + /* no-op */; + new_argv = ALLOCA_N(char*, n + 2); + for (; n > 0; n--) + new_argv[n + 1] = argv[n]; + new_argv[1] = strcpy(ALLOCA_N(char, strlen(argv[0]) + 1), argv[0]); + for (p = new_argv[1]; *p != '\0'; p++) + if (*p == '/') + *p = '\\'; + new_argv[0] = COMMAND; + argv = new_argv; + prog = dln_find_exe(argv[0], 0); + if (!prog) { + errno = ENOENT; + return -1; + } + } +#endif + before_exec(); + status = spawnv(P_WAIT, prog, argv); + last_status_set(status == -1 ? 127 : status, 0); + after_exec(); + return status; +#endif +} + +static int +proc_spawn_n(argc, argv, prog) + int argc; + VALUE *argv; + VALUE prog; +{ + char **args; + int i; + + args = ALLOCA_N(char*, argc + 1); + for (i = 0; i < argc; i++) { + args[i] = RSTRING(argv[i])->ptr; + } + args[i] = (char*) 0; + if (args[0]) + return proc_spawn_v(args, prog ? RSTRING(prog)->ptr : 0); + return -1; +} + +#if defined(_WIN32) +#define proc_spawn(str) rb_w32_spawn(P_NOWAIT, str, 0) +#else +static int +proc_spawn(str) + char *str; +{ + char *s, *t; + char **argv, **a; + int status; + + for (s = str; *s; s++) { + if (*s != ' ' && !ISALPHA(*s) && strchr("*?{}[]<>()~&|\\$;'`\"\n",*s)) { + char *shell = dln_find_exe("sh", 0); + before_exec(); + status = shell?spawnl(P_WAIT,shell,"sh","-c",str,(char*)NULL):system(str); + last_status_set(status == -1 ? 127 : status, 0); + after_exec(); + return status; + } + } + a = argv = ALLOCA_N(char*, (s - str) / 2 + 2); + s = ALLOCA_N(char, s - str + 1); + strcpy(s, str); + if (*a++ = strtok(s, " \t")) { + while (t = strtok(NULL, " \t")) + *a++ = t; + *a = NULL; + } + return argv[0] ? proc_spawn_v(argv, 0) : -1; +} +#endif +#endif + +VALUE +rb_check_argv(argc, argv) + int argc; + VALUE *argv; +{ + VALUE tmp, prog; + int i; + + if (argc == 0) { + rb_raise(rb_eArgError, "wrong number of arguments"); + } + + prog = 0; + tmp = rb_check_array_type(argv[0]); + if (!NIL_P(tmp)) { + if (RARRAY(tmp)->len != 2) { + rb_raise(rb_eArgError, "wrong first argument"); + } + prog = RARRAY(tmp)->ptr[0]; + argv[0] = RARRAY(tmp)->ptr[1]; + SafeStringValue(prog); + } + for (i = 0; i < argc; i++) { + SafeStringValue(argv[i]); + } + security(RSTRING(prog ? prog : argv[0])->ptr); + return prog; +} + +/* + * call-seq: + * exec(command [, arg, ...]) + * + * Replaces the current process by running the given external _command_. + * If +exec+ is given a single argument, that argument is + * taken as a line that is subject to shell expansion before being + * executed. If multiple arguments are given, the second and subsequent + * arguments are passed as parameters to _command_ with no shell + * expansion. If the first argument is a two-element array, the first + * element is the command to be executed, and the second argument is + * used as the <code>argv[0]</code> value, which may show up in process + * listings. In MSDOS environments, the command is executed in a + * subshell; otherwise, one of the <code>exec(2)</code> system calls is + * used, so the running command may inherit some of the environment of + * the original program (including open file descriptors). + * + * Raises SystemCallError if the _command_ couldn't execute (typically + * <code>Errno::ENOENT</code> when it was not found). + * + * exec "echo *" # echoes list of files in current directory + * # never get here + * + * + * exec "echo", "*" # echoes an asterisk + * # never get here + */ + +VALUE +rb_f_exec(argc, argv) + int argc; + VALUE *argv; +{ + struct rb_exec_arg e; + VALUE prog; + + prog = rb_check_argv(argc, argv); + if (!prog && argc == 1) { + e.argc = 0; + e.argv = 0; + e.prog = RSTRING(argv[0])->ptr; + } + else { + e.argc = argc; + e.argv = argv; + e.prog = prog ? RSTRING(prog)->ptr : 0; + } + rb_exec(&e); + rb_sys_fail(e.prog); + return Qnil; /* dummy */ +} + +int +rb_exec(e) + const struct rb_exec_arg *e; +{ + int argc = e->argc; + VALUE *argv = e->argv; + const char *prog = e->prog; + + if (argc == 0) { + rb_proc_exec(prog); + } + else { + rb_proc_exec_n(argc, argv, prog); + } +#ifndef FD_CLOEXEC + preserving_errno({ + fprintf(stderr, "%s:%d: command not found: %s\n", + ruby_sourcefile, ruby_sourceline, prog); + }); +#endif + return -1; +} + +#ifdef HAVE_FORK +#ifdef FD_CLOEXEC +#if SIZEOF_INT == SIZEOF_LONG +#define proc_syswait (VALUE (*)_((VALUE)))rb_syswait +#else +static VALUE +proc_syswait(pid) + VALUE pid; +{ + rb_syswait((int)pid); + return Qnil; +} +#endif +#endif + +/* + * Forks child process, and returns the process ID in the parent + * process. + * + * If +status+ is given, protects from any exceptions and sets the + * jump status to it. + * + * In the child process, just returns 0 if +chfunc+ is +NULL+. + * Otherwise +chfunc+ will be called with +charg+, and then the child + * process exits with +EXIT_SUCCESS+ when it returned zero. + * + * In the case of the function is called and returns non-zero value, + * the child process exits with non-+EXIT_SUCCESS+ value (normaly + * 127). And, on the platforms where +FD_CLOEXEC+ is available, + * +errno+ is propagated to the parent process, and this function + * returns -1 in the parent process. On the other platforms, just + * returns pid. + * + * +chfunc+ must not raise any exceptions. + */ +int +rb_fork(status, chfunc, charg) + int *status; + int (*chfunc) _((void *)); + void *charg; +{ + int pid, err, state = 0; +#ifdef FD_CLOEXEC + int ep[2]; +#endif + +#ifndef __VMS + rb_io_flush(rb_stdout); + rb_io_flush(rb_stderr); +#endif + +#ifdef FD_CLOEXEC + if (chfunc) { + if (pipe(ep)) return -1; + if (fcntl(ep[1], F_SETFD, FD_CLOEXEC)) { + preserving_errno((close(ep[0]), close(ep[1]))); + return -1; + } + } +#endif + while ((pid = fork()) < 0) { + switch (errno) { + case EAGAIN: +#if defined(EWOULDBLOCK) && EWOULDBLOCK != EAGAIN + case EWOULDBLOCK: +#endif + if (!status && !chfunc) { + rb_thread_sleep(1); + continue; + } + else { + rb_protect((VALUE (*)())rb_thread_sleep, 1, &state); + if (status) *status = state; + if (!state) continue; + } + default: +#ifdef FD_CLOEXEC + if (chfunc) { + preserving_errno((close(ep[0]), close(ep[1]))); + } +#endif + if (state && !status) rb_jump_tag(state); + return -1; + } + } + if (!pid) { + if (chfunc) { +#ifdef FD_CLOEXEC + close(ep[0]); +#endif + if (!(*chfunc)(charg)) _exit(EXIT_SUCCESS); +#ifdef FD_CLOEXEC + err = errno; + write(ep[1], &err, sizeof(err)); +#endif +#if EXIT_SUCCESS == 127 + _exit(EXIT_FAILURE); +#else + _exit(127); +#endif + } + } +#ifdef FD_CLOEXEC + else if (chfunc) { + close(ep[1]); + if ((state = read(ep[0], &err, sizeof(err))) < 0) { + err = errno; + } + close(ep[0]); + if (state) { + if (status) { + rb_protect(proc_syswait, (VALUE)pid, status); + } + else { + rb_syswait(pid); + } + errno = err; + return -1; + } + } +#endif + return pid; +} +#endif + +/* + * call-seq: + * Kernel.fork [{ block }] => fixnum or nil + * Process.fork [{ block }] => fixnum or nil + * + * Creates a subprocess. If a block is specified, that block is run + * in the subprocess, and the subprocess terminates with a status of + * zero. Otherwise, the +fork+ call returns twice, once in + * the parent, returning the process ID of the child, and once in + * the child, returning _nil_. The child process can exit using + * <code>Kernel.exit!</code> to avoid running any + * <code>at_exit</code> functions. The parent process should + * use <code>Process.wait</code> to collect the termination statuses + * of its children or use <code>Process.detach</code> to register + * disinterest in their status; otherwise, the operating system + * may accumulate zombie processes. + */ + +static VALUE +rb_f_fork(obj) + VALUE obj; +{ +#ifdef HAVE_FORK + int pid; + + rb_secure(2); + + switch (pid = rb_fork(0, 0, 0)) { + case 0: +#ifdef linux + after_exec(); +#endif + rb_thread_atfork(); + if (rb_block_given_p()) { + int status; + + rb_protect(rb_yield, Qundef, &status); + ruby_stop(status); + } + return Qnil; + + case -1: + rb_sys_fail("fork(2)"); + return Qnil; + + default: + return INT2FIX(pid); + } +#else + rb_notimplement(); +#endif +} + + +/* + * call-seq: + * Process.exit!(fixnum=-1) + * + * Exits the process immediately. No exit handlers are + * run. <em>fixnum</em> is returned to the underlying system as the + * exit status. + * + * Process.exit!(0) + */ + +static VALUE +rb_f_exit_bang(argc, argv, obj) + int argc; + VALUE *argv; + VALUE obj; +{ + VALUE status; + int istatus; + + rb_secure(4); + if (rb_scan_args(argc, argv, "01", &status) == 1) { + switch (status) { + case Qtrue: + istatus = EXIT_SUCCESS; + break; + case Qfalse: + istatus = EXIT_FAILURE; + break; + default: + istatus = NUM2INT(status); + break; + } + } + else { + istatus = EXIT_FAILURE; + } + _exit(istatus); + + return Qnil; /* not reached */ +} + +#if defined(sun) +#define signal(a,b) sigset(a,b) +#endif + +void +rb_syswait(pid) + int pid; +{ + static int overriding; + RETSIGTYPE (*hfunc)_((int)), (*qfunc)_((int)), (*ifunc)_((int)); + int status; + int i, hooked = Qfalse; + + if (!overriding) { +#ifdef SIGHUP + hfunc = signal(SIGHUP, SIG_IGN); +#endif +#ifdef SIGQUIT + qfunc = signal(SIGQUIT, SIG_IGN); +#endif + ifunc = signal(SIGINT, SIG_IGN); + overriding = Qtrue; + hooked = Qtrue; + } + + do { + i = rb_waitpid(pid, &status, 0); + } while (i == -1 && errno == EINTR); + + if (hooked) { +#ifdef SIGHUP + signal(SIGHUP, hfunc); +#endif +#ifdef SIGQUIT + signal(SIGQUIT, qfunc); +#endif + signal(SIGINT, ifunc); + overriding = Qfalse; + } +} + +int +rb_spawn(argc, argv) + int argc; + VALUE *argv; +{ + int status; + VALUE prog; +#if defined HAVE_FORK + struct rb_exec_arg earg; +#endif + + prog = rb_check_argv(argc, argv); + + if (!prog && argc == 1) { + --argc; + prog = *argv++; + } +#if defined HAVE_FORK + earg.argc = argc; + earg.argv = argv; + earg.prog = prog ? RSTRING(prog)->ptr : 0; + status = rb_fork(&status, (int (*)_((void*)))rb_exec, &earg); + if (prog && argc) argv[0] = prog; +#elif defined HAVE_SPAWNV + if (!argc) { + status = proc_spawn(RSTRING(prog)->ptr); + } + else { + status = proc_spawn_n(argc, argv, prog); + } + if (prog && argc) argv[0] = prog; +#else + if (prog && argc) argv[0] = prog; + if (argc) prog = rb_ary_join(rb_ary_new4(argc, argv), rb_str_new2(" ")); + status = system(StringValuePtr(prog)); +# if defined(__human68k__) || defined(__DJGPP__) + last_status_set(status == -1 ? 127 : status, 0); +# else + last_status_set((status & 0xff) << 8, 0); +# endif +#endif + return status; +} + +/* + * call-seq: + * system(cmd [, arg, ...]) => true or false + * + * Executes _cmd_ in a subshell, returning +true+ if the command ran + * successfully, +false+ otherwise. An error status is available in + * <code>$?</code>. The arguments are processed in the same way as + * for <code>Kernel::exec</code>, and raises same exceptions as it. + * + * system("echo *") + * system("echo", "*") + * + * <em>produces:</em> + * + * config.h main.rb + * * + */ + +static VALUE +rb_f_system(argc, argv) + int argc; + VALUE *argv; +{ + int status; + + status = rb_spawn(argc, argv); + if (status == -1) rb_sys_fail(RSTRING(argv[0])->ptr); +#if defined(HAVE_FORK) || defined(HAVE_SPAWNV) + rb_syswait(status); + status = NUM2INT(rb_last_status); +#endif + if (status == EXIT_SUCCESS) return Qtrue; + return Qfalse; +} + +/* + * call-seq: + * spawn(cmd [, arg, ...]) => pid + * + * Similar to <code>Kernel::system</code> except for not waiting for + * end of _cmd_, but returns its <i>pid</i>. + */ + +static VALUE +rb_f_spawn(argc, argv) + int argc; + VALUE *argv; +{ + int pid; + + pid = rb_spawn(argc, argv); + if (pid == -1) rb_sys_fail(RSTRING(argv[0])->ptr); +#if defined(HAVE_FORK) || defined(HAVE_SPAWNV) + return INT2NUM(pid); +#else + return Qnil; +#endif +} + +/* + * call-seq: + * sleep([duration]) => fixnum + * + * Suspends the current thread for _duration_ seconds (which may be + * any number, including a +Float+ with fractional seconds). Returns the actual + * number of seconds slept (rounded), which may be less than that asked + * for if the thread was interrupted by a +SIGALRM+, or if + * another thread calls <code>Thread#run</code>. Zero arguments + * causes +sleep+ to sleep forever. + * + * Time.new #=> Wed Apr 09 08:56:32 CDT 2003 + * sleep 1.2 #=> 1 + * Time.new #=> Wed Apr 09 08:56:33 CDT 2003 + * sleep 1.9 #=> 2 + * Time.new #=> Wed Apr 09 08:56:35 CDT 2003 + */ + +static VALUE +rb_f_sleep(argc, argv) + int argc; + VALUE *argv; +{ + int beg, end; + + beg = time(0); + if (argc == 0) { + rb_thread_sleep_forever(); + } + else if (argc == 1) { + rb_thread_wait_for(rb_time_interval(argv[0])); + } + else { + rb_raise(rb_eArgError, "wrong number of arguments"); + } + + end = time(0) - beg; + + return INT2FIX(end); +} + + +/* + * call-seq: + * Process.getpgrp => integer + * + * Returns the process group ID for this process. Not available on + * all platforms. + * + * Process.getpgid(0) #=> 25527 + * Process.getpgrp #=> 25527 + */ + +static VALUE +proc_getpgrp() +{ + int pgrp; + + rb_secure(2); +#if defined(HAVE_GETPGRP) && defined(GETPGRP_VOID) + pgrp = getpgrp(); + if (pgrp < 0) rb_sys_fail(0); + return INT2FIX(pgrp); +#else +# ifdef HAVE_GETPGID + pgrp = getpgid(0); + if (pgrp < 0) rb_sys_fail(0); + return INT2FIX(pgrp); +# else + rb_notimplement(); +# endif +#endif +} + + +/* + * call-seq: + * Process.setpgrp => 0 + * + * Equivalent to <code>setpgid(0,0)</code>. Not available on all + * platforms. + */ + +static VALUE +proc_setpgrp() +{ + rb_secure(2); + /* check for posix setpgid() first; this matches the posix */ + /* getpgrp() above. It appears that configure will set SETPGRP_VOID */ + /* even though setpgrp(0,0) would be prefered. The posix call avoids */ + /* this confusion. */ +#ifdef HAVE_SETPGID + if (setpgid(0,0) < 0) rb_sys_fail(0); +#elif defined(HAVE_SETPGRP) && defined(SETPGRP_VOID) + if (setpgrp() < 0) rb_sys_fail(0); +#else + rb_notimplement(); +#endif + return INT2FIX(0); +} + + +/* + * call-seq: + * Process.getpgid(pid) => integer + * + * Returns the process group ID for the given process id. Not + * available on all platforms. + * + * Process.getpgid(Process.ppid()) #=> 25527 + */ + +static VALUE +proc_getpgid(obj, pid) + VALUE obj, pid; +{ +#if defined(HAVE_GETPGID) && !defined(__CHECKER__) + int i; + + rb_secure(2); + i = getpgid(NUM2INT(pid)); + if (i < 0) rb_sys_fail(0); + return INT2NUM(i); +#else + rb_notimplement(); +#endif +} + + +/* + * call-seq: + * Process.setpgid(pid, integer) => 0 + * + * Sets the process group ID of _pid_ (0 indicates this + * process) to <em>integer</em>. Not available on all platforms. + */ + +static VALUE +proc_setpgid(obj, pid, pgrp) + VALUE obj, pid, pgrp; +{ +#ifdef HAVE_SETPGID + int ipid, ipgrp; + + rb_secure(2); + ipid = NUM2INT(pid); + ipgrp = NUM2INT(pgrp); + + if (setpgid(ipid, ipgrp) < 0) rb_sys_fail(0); + return INT2FIX(0); +#else + rb_notimplement(); +#endif +} + + +/* + * call-seq: + * Process.setsid => fixnum + * + * Establishes this process as a new session and process group + * leader, with no controlling tty. Returns the session id. Not + * available on all platforms. + * + * Process.setsid #=> 27422 + */ + +static VALUE +proc_setsid() +{ +#if defined(HAVE_SETSID) + int pid; + + rb_secure(2); + pid = setsid(); + if (pid < 0) rb_sys_fail(0); + return INT2FIX(pid); +#elif defined(HAVE_SETPGRP) && defined(TIOCNOTTY) + pid_t pid; + int ret; + + rb_secure(2); + pid = getpid(); +#if defined(SETPGRP_VOID) + ret = setpgrp(); + /* If `pid_t setpgrp(void)' is equivalent to setsid(), + `ret' will be the same value as `pid', and following open() will fail. + In Linux, `int setpgrp(void)' is equivalent to setpgid(0, 0). */ +#else + ret = setpgrp(0, pid); +#endif + if (ret == -1) rb_sys_fail(0); + + if ((fd = open("/dev/tty", O_RDWR)) >= 0) { + ioctl(fd, TIOCNOTTY, NULL); + close(fd); + } + return INT2FIX(pid); +#else + rb_notimplement(); +#endif +} + + +/* + * call-seq: + * Process.getpriority(kind, integer) => fixnum + * + * Gets the scheduling priority for specified process, process group, + * or user. <em>kind</em> indicates the kind of entity to find: one + * of <code>Process::PRIO_PGRP</code>, + * <code>Process::PRIO_USER</code>, or + * <code>Process::PRIO_PROCESS</code>. _integer_ is an id + * indicating the particular process, process group, or user (an id + * of 0 means _current_). Lower priorities are more favorable + * for scheduling. Not available on all platforms. + * + * Process.getpriority(Process::PRIO_USER, 0) #=> 19 + * Process.getpriority(Process::PRIO_PROCESS, 0) #=> 19 + */ + +static VALUE +proc_getpriority(obj, which, who) + VALUE obj, which, who; +{ +#ifdef HAVE_GETPRIORITY + int prio, iwhich, iwho; + + rb_secure(2); + iwhich = NUM2INT(which); + iwho = NUM2INT(who); + + errno = 0; + prio = getpriority(iwhich, iwho); + if (errno) rb_sys_fail(0); + return INT2FIX(prio); +#else + rb_notimplement(); +#endif +} + + +/* + * call-seq: + * Process.setpriority(kind, integer, priority) => 0 + * + * See <code>Process#getpriority</code>. + * + * Process.setpriority(Process::PRIO_USER, 0, 19) #=> 0 + * Process.setpriority(Process::PRIO_PROCESS, 0, 19) #=> 0 + * Process.getpriority(Process::PRIO_USER, 0) #=> 19 + * Process.getpriority(Process::PRIO_PROCESS, 0) #=> 19 + */ + +static VALUE +proc_setpriority(obj, which, who, prio) + VALUE obj, which, who, prio; +{ +#ifdef HAVE_GETPRIORITY + int iwhich, iwho, iprio; + + rb_secure(2); + iwhich = NUM2INT(which); + iwho = NUM2INT(who); + iprio = NUM2INT(prio); + + if (setpriority(iwhich, iwho, iprio) < 0) + rb_sys_fail(0); + return INT2FIX(0); +#else + rb_notimplement(); +#endif +} + +#if SIZEOF_RLIM_T == SIZEOF_INT +# define RLIM2NUM(v) UINT2NUM(v) +# define NUM2RLIM(v) NUM2UINT(v) +#elif SIZEOF_RLIM_T == SIZEOF_LONG +# define RLIM2NUM(v) ULONG2NUM(v) +# define NUM2RLIM(v) NUM2ULONG(v) +#elif SIZEOF_RLIM_T == SIZEOF_LONG_LONG +# define RLIM2NUM(v) ULL2NUM(v) +# define NUM2RLIM(v) NUM2ULL(v) +#endif + +/* + * call-seq: + * Process.getrlimit(resource) => [cur_limit, max_limit] + * + * Gets the resource limit of the process. + * _cur_limit_ means current (soft) limit and + * _max_limit_ means maximum (hard) limit. + * + * _resource_ indicates the kind of resource to limit: + * such as <code>Process::RLIMIT_CORE</code>, + * <code>Process::RLIMIT_CPU</code>, etc. + * See Process.setrlimit for details. + * + * _cur_limit_ and _max_limit_ may be <code>Process::RLIM_INFINITY</code>, + * <code>Process::RLIM_SAVED_MAX</code> or + * <code>Process::RLIM_SAVED_CUR</code>. + * See Process.setrlimit and the system getrlimit(2) manual for details. + */ + +static VALUE +proc_getrlimit(VALUE obj, VALUE resource) +{ +#if defined(HAVE_GETRLIMIT) && defined(RLIM2NUM) + struct rlimit rlim; + + rb_secure(2); + + if (getrlimit(NUM2INT(resource), &rlim) < 0) { + rb_sys_fail("getrlimit"); + } + return rb_assoc_new(RLIM2NUM(rlim.rlim_cur), RLIM2NUM(rlim.rlim_max)); +#else + rb_notimplement(); +#endif +} + +/* + * call-seq: + * Process.setrlimit(resource, cur_limit, max_limit) => nil + * + * Sets the resource limit of the process. + * _cur_limit_ means current (soft) limit and + * _max_limit_ means maximum (hard) limit. + * + * _resource_ indicates the kind of resource to limit. + * Although the list of resources are OS dependent, + * SUSv3 defines following resources. + * + * [Process::RLIMIT_CORE] core size (bytes) + * [Process::RLIMIT_CPU] CPU time (seconds) + * [Process::RLIMIT_DATA] data segment (bytes) + * [Process::RLIMIT_FSIZE] file size (bytes) + * [Process::RLIMIT_NOFILE] file descriptors (number) + * [Process::RLIMIT_STACK] stack size (bytes) + * [Process::RLIMIT_AS] total available memory (bytes) + * + * Other <code>Process::RLIMIT_???</code> constants may be defined. + * + * _cur_limit_ and _max_limit_ may be <code>Process::RLIM_INFINITY</code>, + * which means that the resource is not limited. + * They may be <code>Process::RLIM_SAVED_MAX</code> or + * <code>Process::RLIM_SAVED_CUR</code> too. + * See system setrlimit(2) manual for details. + * + */ + +static VALUE +proc_setrlimit(VALUE obj, VALUE resource, VALUE rlim_cur, VALUE rlim_max) +{ +#if defined(HAVE_SETRLIMIT) && defined(NUM2RLIM) + struct rlimit rlim; + + rb_secure(2); + + rlim.rlim_cur = NUM2RLIM(rlim_cur); + rlim.rlim_max = NUM2RLIM(rlim_max); + + if (setrlimit(NUM2INT(resource), &rlim) < 0) { + rb_sys_fail("setrlimit"); + } + return Qnil; +#else + rb_notimplement(); +#endif +} + +static int under_uid_switch = 0; +static void +check_uid_switch() +{ + rb_secure(2); + if (under_uid_switch) { + rb_raise(rb_eRuntimeError, "can't handle UID while evaluating block given to Process::UID.switch method"); + } +} + +static int under_gid_switch = 0; +static void +check_gid_switch() +{ + rb_secure(2); + if (under_gid_switch) { + rb_raise(rb_eRuntimeError, "can't handle GID while evaluating block given to Process::UID.switch method"); + } +} + + +/********************************************************************* + * Document-class: Process::Sys + * + * The <code>Process::Sys</code> module contains UID and GID + * functions which provide direct bindings to the system calls of the + * same names instead of the more-portable versions of the same + * functionality found in the <code>Process</code>, + * <code>Process::UID</code>, and <code>Process::GID</code> modules. + */ + + +/* + * call-seq: + * Process::Sys.setuid(integer) => nil + * + * Set the user ID of the current process to _integer_. Not + * available on all platforms. + * + */ + +static VALUE +p_sys_setuid(obj, id) + VALUE obj, id; +{ +#if defined HAVE_SETUID + check_uid_switch(); + if (setuid(NUM2INT(id)) != 0) rb_sys_fail(0); +#else + rb_notimplement(); +#endif + return Qnil; +} + + + +/* + * call-seq: + * Process::Sys.setruid(integer) => nil + * + * Set the real user ID of the calling process to _integer_. + * Not available on all platforms. + * + */ + +static VALUE +p_sys_setruid(obj, id) + VALUE obj, id; +{ +#if defined HAVE_SETRUID + check_uid_switch(); + if (setruid(NUM2INT(id)) != 0) rb_sys_fail(0); +#else + rb_notimplement(); +#endif + return Qnil; +} + + +/* + * call-seq: + * Process::Sys.seteuid(integer) => nil + * + * Set the effective user ID of the calling process to + * _integer_. Not available on all platforms. + * + */ + +static VALUE +p_sys_seteuid(obj, id) + VALUE obj, id; +{ +#if defined HAVE_SETEUID + check_uid_switch(); + if (seteuid(NUM2INT(id)) != 0) rb_sys_fail(0); +#else + rb_notimplement(); +#endif + return Qnil; +} + + +/* + * call-seq: + * Process::Sys.setreuid(rid, eid) => nil + * + * Sets the (integer) real and/or effective user IDs of the current + * process to _rid_ and _eid_, respectively. A value of + * <code>-1</code> for either means to leave that ID unchanged. Not + * available on all platforms. + * + */ + +static VALUE +p_sys_setreuid(obj, rid, eid) + VALUE obj, rid, eid; +{ +#if defined HAVE_SETREUID + check_uid_switch(); + if (setreuid(NUM2INT(rid),NUM2INT(eid)) != 0) rb_sys_fail(0); +#else + rb_notimplement(); +#endif + return Qnil; +} + + +/* + * call-seq: + * Process::Sys.setresuid(rid, eid, sid) => nil + * + * Sets the (integer) real, effective, and saved user IDs of the + * current process to _rid_, _eid_, and _sid_ respectively. A + * value of <code>-1</code> for any value means to + * leave that ID unchanged. Not available on all platforms. + * + */ + +static VALUE +p_sys_setresuid(obj, rid, eid, sid) + VALUE obj, rid, eid, sid; +{ +#if defined HAVE_SETRESUID + check_uid_switch(); + if (setresuid(NUM2INT(rid),NUM2INT(eid),NUM2INT(sid)) != 0) rb_sys_fail(0); +#else + rb_notimplement(); +#endif + return Qnil; +} + + +/* + * call-seq: + * Process.uid => fixnum + * Process::UID.rid => fixnum + * Process::Sys.getuid => fixnum + * + * Returns the (real) user ID of this process. + * + * Process.uid #=> 501 + */ + +static VALUE +proc_getuid(obj) + VALUE obj; +{ + int uid = getuid(); + return INT2FIX(uid); +} + + +/* + * call-seq: + * Process.uid= integer => numeric + * + * Sets the (integer) user ID for this process. Not available on all + * platforms. + */ + +static VALUE +proc_setuid(obj, id) + VALUE obj, id; +{ + int uid = NUM2INT(id); + + check_uid_switch(); +#if defined(HAVE_SETRESUID) && !defined(__CHECKER__) + if (setresuid(uid, -1, -1) < 0) rb_sys_fail(0); +#elif defined HAVE_SETREUID + if (setreuid(uid, -1) < 0) rb_sys_fail(0); +#elif defined HAVE_SETRUID + if (setruid(uid) < 0) rb_sys_fail(0); +#elif defined HAVE_SETUID + { + if (geteuid() == uid) { + if (setuid(uid) < 0) rb_sys_fail(0); + } + else { + rb_notimplement(); + } + } +#else + rb_notimplement(); +#endif + return INT2FIX(uid); +} + + +/******************************************************************** + * + * Document-class: Process::UID + * + * The <code>Process::UID</code> module contains a collection of + * module functions which can be used to portably get, set, and + * switch the current process's real, effective, and saved user IDs. + * + */ + +static int SAVED_USER_ID; + + +/* + * call-seq: + * Process::UID.change_privilege(integer) => fixnum + * + * Change the current process's real and effective user ID to that + * specified by _integer_. Returns the new user ID. Not + * available on all platforms. + * + * [Process.uid, Process.euid] #=> [0, 0] + * Process::UID.change_privilege(31) #=> 31 + * [Process.uid, Process.euid] #=> [31, 31] + */ + +static VALUE +p_uid_change_privilege(obj, id) + VALUE obj, id; +{ + extern int errno; + int uid; + + check_uid_switch(); + + uid = NUM2INT(id); + + if (geteuid() == 0) { /* root-user */ +#if defined(HAVE_SETRESUID) + if (setresuid(uid, uid, uid) < 0) rb_sys_fail(0); + SAVED_USER_ID = uid; +#elif defined(HAVE_SETUID) + if (setuid(uid) < 0) rb_sys_fail(0); + SAVED_USER_ID = uid; +#elif defined(HAVE_SETREUID) && !defined(OBSOLETE_SETREUID) + if (getuid() == uid) { + if (SAVED_USER_ID == uid) { + if (setreuid(-1, uid) < 0) rb_sys_fail(0); + } else { + if (uid == 0) { /* (r,e,s) == (root, root, x) */ + if (setreuid(-1, SAVED_USER_ID) < 0) rb_sys_fail(0); + if (setreuid(SAVED_USER_ID, 0) < 0) rb_sys_fail(0); + SAVED_USER_ID = 0; /* (r,e,s) == (x, root, root) */ + if (setreuid(uid, uid) < 0) rb_sys_fail(0); + SAVED_USER_ID = uid; + } else { + if (setreuid(0, -1) < 0) rb_sys_fail(0); + SAVED_USER_ID = 0; + if (setreuid(uid, uid) < 0) rb_sys_fail(0); + SAVED_USER_ID = uid; + } + } + } else { + if (setreuid(uid, uid) < 0) rb_sys_fail(0); + SAVED_USER_ID = uid; + } +#elif defined(HAVE_SETRUID) && defined(HAVE_SETEUID) + if (getuid() == uid) { + if (SAVED_USER_ID == uid) { + if (seteuid(uid) < 0) rb_sys_fail(0); + } else { + if (uid == 0) { + if (setruid(SAVED_USER_ID) < 0) rb_sys_fail(0); + SAVED_USER_ID = 0; + if (setruid(0) < 0) rb_sys_fail(0); + } else { + if (setruid(0) < 0) rb_sys_fail(0); + SAVED_USER_ID = 0; + if (seteuid(uid) < 0) rb_sys_fail(0); + if (setruid(uid) < 0) rb_sys_fail(0); + SAVED_USER_ID = uid; + } + } + } else { + if (seteuid(uid) < 0) rb_sys_fail(0); + if (setruid(uid) < 0) rb_sys_fail(0); + SAVED_USER_ID = uid; + } +#else + rb_notimplement(); +#endif + } else { /* unprivileged user */ +#if defined(HAVE_SETRESUID) + if (setresuid((getuid() == uid)? -1: uid, + (geteuid() == uid)? -1: uid, + (SAVED_USER_ID == uid)? -1: uid) < 0) rb_sys_fail(0); + SAVED_USER_ID = uid; +#elif defined(HAVE_SETREUID) && !defined(OBSOLETE_SETREUID) + if (SAVED_USER_ID == uid) { + if (setreuid((getuid() == uid)? -1: uid, + (geteuid() == uid)? -1: uid) < 0) rb_sys_fail(0); + } else if (getuid() != uid) { + if (setreuid(uid, (geteuid() == uid)? -1: uid) < 0) rb_sys_fail(0); + SAVED_USER_ID = uid; + } else if (/* getuid() == uid && */ geteuid() != uid) { + if (setreuid(geteuid(), uid) < 0) rb_sys_fail(0); + SAVED_USER_ID = uid; + if (setreuid(uid, -1) < 0) rb_sys_fail(0); + } else { /* getuid() == uid && geteuid() == uid */ + if (setreuid(-1, SAVED_USER_ID) < 0) rb_sys_fail(0); + if (setreuid(SAVED_USER_ID, uid) < 0) rb_sys_fail(0); + SAVED_USER_ID = uid; + if (setreuid(uid, -1) < 0) rb_sys_fail(0); + } +#elif defined(HAVE_SETRUID) && defined(HAVE_SETEUID) + if (SAVED_USER_ID == uid) { + if (geteuid() != uid && seteuid(uid) < 0) rb_sys_fail(0); + if (getuid() != uid && setruid(uid) < 0) rb_sys_fail(0); + } else if (/* SAVED_USER_ID != uid && */ geteuid() == uid) { + if (getuid() != uid) { + if (setruid(uid) < 0) rb_sys_fail(0); + SAVED_USER_ID = uid; + } else { + if (setruid(SAVED_USER_ID) < 0) rb_sys_fail(0); + SAVED_USER_ID = uid; + if (setruid(uid) < 0) rb_sys_fail(0); + } + } else if (/* geteuid() != uid && */ getuid() == uid) { + if (seteuid(uid) < 0) rb_sys_fail(0); + if (setruid(SAVED_USER_ID) < 0) rb_sys_fail(0); + SAVED_USER_ID = uid; + if (setruid(uid) < 0) rb_sys_fail(0); + } else { + errno = EPERM; + rb_sys_fail(0); + } +#elif defined HAVE_44BSD_SETUID + if (getuid() == uid) { + /* (r,e,s)==(uid,?,?) ==> (uid,uid,uid) */ + if (setuid(uid) < 0) rb_sys_fail(0); + SAVED_USER_ID = uid; + } else { + errno = EPERM; + rb_sys_fail(0); + } +#elif defined HAVE_SETEUID + if (getuid() == uid && SAVED_USER_ID == uid) { + if (seteuid(uid) < 0) rb_sys_fail(0); + } else { + errno = EPERM; + rb_sys_fail(0); + } +#elif defined HAVE_SETUID + if (getuid() == uid && SAVED_USER_ID == uid) { + if (setuid(uid) < 0) rb_sys_fail(0); + } else { + errno = EPERM; + rb_sys_fail(0); + } +#else + rb_notimplement(); +#endif + } + return INT2FIX(uid); +} + + + +/* + * call-seq: + * Process::Sys.setgid(integer) => nil + * + * Set the group ID of the current process to _integer_. Not + * available on all platforms. + * + */ + +static VALUE +p_sys_setgid(obj, id) + VALUE obj, id; +{ +#if defined HAVE_SETGID + check_gid_switch(); + if (setgid(NUM2INT(id)) != 0) rb_sys_fail(0); +#else + rb_notimplement(); +#endif + return Qnil; +} + + +/* + * call-seq: + * Process::Sys.setrgid(integer) => nil + * + * Set the real group ID of the calling process to _integer_. + * Not available on all platforms. + * + */ + +static VALUE +p_sys_setrgid(obj, id) + VALUE obj, id; +{ +#if defined HAVE_SETRGID + check_gid_switch(); + if (setrgid(NUM2INT(id)) != 0) rb_sys_fail(0); +#else + rb_notimplement(); +#endif + return Qnil; +} + + + +/* + * call-seq: + * Process::Sys.setegid(integer) => nil + * + * Set the effective group ID of the calling process to + * _integer_. Not available on all platforms. + * + */ + +static VALUE +p_sys_setegid(obj, id) + VALUE obj, id; +{ +#if defined HAVE_SETEGID + check_gid_switch(); + if (setegid(NUM2INT(id)) != 0) rb_sys_fail(0); +#else + rb_notimplement(); +#endif + return Qnil; +} + + +/* + * call-seq: + * Process::Sys.setregid(rid, eid) => nil + * + * Sets the (integer) real and/or effective group IDs of the current + * process to <em>rid</em> and <em>eid</em>, respectively. A value of + * <code>-1</code> for either means to leave that ID unchanged. Not + * available on all platforms. + * + */ + +static VALUE +p_sys_setregid(obj, rid, eid) + VALUE obj, rid, eid; +{ +#if defined HAVE_SETREGID + check_gid_switch(); + if (setregid(NUM2INT(rid),NUM2INT(eid)) != 0) rb_sys_fail(0); +#else + rb_notimplement(); +#endif + return Qnil; +} + +/* + * call-seq: + * Process::Sys.setresgid(rid, eid, sid) => nil + * + * Sets the (integer) real, effective, and saved user IDs of the + * current process to <em>rid</em>, <em>eid</em>, and <em>sid</em> + * respectively. A value of <code>-1</code> for any value means to + * leave that ID unchanged. Not available on all platforms. + * + */ + +static VALUE +p_sys_setresgid(obj, rid, eid, sid) + VALUE obj, rid, eid, sid; +{ +#if defined HAVE_SETRESGID + check_gid_switch(); + if (setresgid(NUM2INT(rid),NUM2INT(eid),NUM2INT(sid)) != 0) rb_sys_fail(0); +#else + rb_notimplement(); +#endif + return Qnil; +} + + +/* + * call-seq: + * Process::Sys.issetugid => true or false + * + * Returns +true+ if the process was created as a result + * of an execve(2) system call which had either of the setuid or + * setgid bits set (and extra privileges were given as a result) or + * if it has changed any of its real, effective or saved user or + * group IDs since it began execution. + * + */ + +static VALUE +p_sys_issetugid(obj) + VALUE obj; +{ +#if defined HAVE_ISSETUGID + rb_secure(2); + if (issetugid()) { + return Qtrue; + } else { + return Qfalse; + } +#else + rb_notimplement(); + return Qnil; /* not reached */ +#endif +} + + +/* + * call-seq: + * Process.gid => fixnum + * Process::GID.rid => fixnum + * Process::Sys.getgid => fixnum + * + * Returns the (real) group ID for this process. + * + * Process.gid #=> 500 + */ + +static VALUE +proc_getgid(obj) + VALUE obj; +{ + int gid = getgid(); + return INT2FIX(gid); +} + + +/* + * call-seq: + * Process.gid= fixnum => fixnum + * + * Sets the group ID for this process. + */ + +static VALUE +proc_setgid(obj, id) + VALUE obj, id; +{ + int gid = NUM2INT(id); + + check_gid_switch(); +#if defined(HAVE_SETRESGID) && !defined(__CHECKER__) + if (setresgid(gid, -1, -1) < 0) rb_sys_fail(0); +#elif defined HAVE_SETREGID + if (setregid(gid, -1) < 0) rb_sys_fail(0); +#elif defined HAVE_SETRGID + if (setrgid((GIDTYPE)gid) < 0) rb_sys_fail(0); +#elif defined HAVE_SETGID + { + if (getegid() == gid) { + if (setgid(gid) < 0) rb_sys_fail(0); + } + else { + rb_notimplement(); + } + } +#else + rb_notimplement(); +#endif + return INT2FIX(gid); +} + + +static size_t maxgroups = 32; + + +/* + * call-seq: + * Process.groups => array + * + * Get an <code>Array</code> of the gids of groups in the + * supplemental group access list for this process. + * + * Process.groups #=> [27, 6, 10, 11] + * + */ + +static VALUE +proc_getgroups(VALUE obj) +{ +#ifdef HAVE_GETGROUPS + VALUE ary; + size_t ngroups; + gid_t *groups; + int i; + + groups = ALLOCA_N(gid_t, maxgroups); + + ngroups = getgroups(maxgroups, groups); + if (ngroups == -1) + rb_sys_fail(0); + + ary = rb_ary_new(); + for (i = 0; i < ngroups; i++) + rb_ary_push(ary, INT2NUM(groups[i])); + + return ary; +#else + rb_notimplement(); + return Qnil; +#endif +} + + +/* + * call-seq: + * Process.groups= array => array + * + * Set the supplemental group access list to the given + * <code>Array</code> of group IDs. + * + * Process.groups #=> [0, 1, 2, 3, 4, 6, 10, 11, 20, 26, 27] + * Process.groups = [27, 6, 10, 11] #=> [27, 6, 10, 11] + * Process.groups #=> [27, 6, 10, 11] + * + */ + +static VALUE +proc_setgroups(VALUE obj, VALUE ary) +{ +#ifdef HAVE_SETGROUPS + size_t ngroups; + gid_t *groups; + int i; + struct group *gr; + + Check_Type(ary, T_ARRAY); + + ngroups = RARRAY(ary)->len; + if (ngroups > maxgroups) + rb_raise(rb_eArgError, "too many groups, %d max", maxgroups); + + groups = ALLOCA_N(gid_t, ngroups); + + for (i = 0; i < ngroups && i < RARRAY(ary)->len; i++) { + VALUE g = RARRAY(ary)->ptr[i]; + + if (FIXNUM_P(g)) { + groups[i] = FIX2INT(g); + } + else { + VALUE tmp = rb_check_string_type(g); + + if (NIL_P(tmp)) { + groups[i] = NUM2INT(g); + } + else { + gr = getgrnam(RSTRING(tmp)->ptr); + if (gr == NULL) + rb_raise(rb_eArgError, + "can't find group for %s", RSTRING(tmp)->ptr); + groups[i] = gr->gr_gid; + } + } + } + + i = setgroups(ngroups, groups); + if (i == -1) + rb_sys_fail(0); + + return proc_getgroups(obj); +#else + rb_notimplement(); + return Qnil; +#endif +} + + +/* + * call-seq: + * Process.initgroups(username, gid) => array + * + * Initializes the supplemental group access list by reading the + * system group database and using all groups of which the given user + * is a member. The group with the specified <em>gid</em> is also + * added to the list. Returns the resulting <code>Array</code> of the + * gids of all the groups in the supplementary group access list. Not + * available on all platforms. + * + * Process.groups #=> [0, 1, 2, 3, 4, 6, 10, 11, 20, 26, 27] + * Process.initgroups( "mgranger", 30 ) #=> [30, 6, 10, 11] + * Process.groups #=> [30, 6, 10, 11] + * + */ + +static VALUE +proc_initgroups(obj, uname, base_grp) + VALUE obj, uname, base_grp; +{ +#ifdef HAVE_INITGROUPS + if (initgroups(StringValuePtr(uname), (gid_t)NUM2INT(base_grp)) != 0) { + rb_sys_fail(0); + } + return proc_getgroups(obj); +#else + rb_notimplement(); + return Qnil; +#endif +} + + +/* + * call-seq: + * Process.maxgroups => fixnum + * + * Returns the maximum number of gids allowed in the supplemental + * group access list. + * + * Process.maxgroups #=> 32 + */ + +static VALUE +proc_getmaxgroups(obj) + VALUE obj; +{ + return INT2FIX(maxgroups); +} + + +/* + * call-seq: + * Process.maxgroups= fixnum => fixnum + * + * Sets the maximum number of gids allowed in the supplemental group + * access list. + */ + +static VALUE +proc_setmaxgroups(obj, val) + VALUE obj; +{ + size_t ngroups = FIX2INT(val); + + if (ngroups > 4096) + ngroups = 4096; + + maxgroups = ngroups; + + return INT2FIX(maxgroups); +} + +/* + * call-seq: + * Process.daemon() => fixnum + * Process.daemon(nochdir=nil,noclose=nil) => fixnum + * + * Detach the process from controlling terminal and run in + * the background as system daemon. Unless the argument + * nochdir is true (i.e. non false), it changes the current + * working directory to the root ("/"). Unless the argument + * noclose is true, daemon() will redirect standard input, + * standard output and standard error to /dev/null. + */ + +static VALUE +proc_daemon(argc, argv) + int argc; + VALUE *argv; +{ + VALUE nochdir, noclose; + int n; + + rb_scan_args(argc, argv, "02", &nochdir, &noclose); + +#if defined(HAVE_DAEMON) + n = daemon(RTEST(nochdir), RTEST(noclose)); + if (n < 0) rb_sys_fail("daemon"); + return INT2FIX(n); +#elif defined(HAVE_FORK) + switch (rb_fork(0, 0, 0)) { + case -1: + return (-1); + case 0: + break; + default: + _exit(0); + } + + proc_setsid(); + + if (!RTEST(nochdir)) + (void)chdir("/"); + + if (!RTEST(noclose) && (n = open("/dev/null", O_RDWR, 0)) != -1) { + (void)dup2(n, 0); + (void)dup2(n, 1); + (void)dup2(n, 2); + if (n > 2) + (void)close (n); + } + return INT2FIX(0); +#else + rb_notimplement(); +#endif +} + +/******************************************************************** + * + * Document-class: Process::GID + * + * The <code>Process::GID</code> module contains a collection of + * module functions which can be used to portably get, set, and + * switch the current process's real, effective, and saved group IDs. + * + */ + +static int SAVED_GROUP_ID; + + +/* + * call-seq: + * Process::GID.change_privilege(integer) => fixnum + * + * Change the current process's real and effective group ID to that + * specified by _integer_. Returns the new group ID. Not + * available on all platforms. + * + * [Process.gid, Process.egid] #=> [0, 0] + * Process::GID.change_privilege(33) #=> 33 + * [Process.gid, Process.egid] #=> [33, 33] + */ + +static VALUE +p_gid_change_privilege(obj, id) + VALUE obj, id; +{ + extern int errno; + int gid; + + check_gid_switch(); + + gid = NUM2INT(id); + + if (geteuid() == 0) { /* root-user */ +#if defined(HAVE_SETRESGID) + if (setresgid(gid, gid, gid) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = gid; +#elif defined HAVE_SETGID + if (setgid(gid) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = gid; +#elif defined(HAVE_SETREGID) && !defined(OBSOLETE_SETREGID) + if (getgid() == gid) { + if (SAVED_GROUP_ID == gid) { + if (setregid(-1, gid) < 0) rb_sys_fail(0); + } else { + if (gid == 0) { /* (r,e,s) == (root, y, x) */ + if (setregid(-1, SAVED_GROUP_ID) < 0) rb_sys_fail(0); + if (setregid(SAVED_GROUP_ID, 0) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = 0; /* (r,e,s) == (x, root, root) */ + if (setregid(gid, gid) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = gid; + } else { /* (r,e,s) == (z, y, x) */ + if (setregid(0, 0) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = 0; + if (setregid(gid, gid) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = gid; + } + } + } else { + if (setregid(gid, gid) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = gid; + } +#elif defined(HAVE_SETRGID) && defined (HAVE_SETEGID) + if (getgid() == gid) { + if (SAVED_GROUP_ID == gid) { + if (setegid(gid) < 0) rb_sys_fail(0); + } else { + if (gid == 0) { + if (setegid(gid) < 0) rb_sys_fail(0); + if (setrgid(SAVED_GROUP_ID) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = 0; + if (setrgid(0) < 0) rb_sys_fail(0); + } else { + if (setrgid(0) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = 0; + if (setegid(gid) < 0) rb_sys_fail(0); + if (setrgid(gid) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = gid; + } + } + } else { + if (setegid(gid) < 0) rb_sys_fail(0); + if (setrgid(gid) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = gid; + } +#else + rb_notimplement(); +#endif + } else { /* unprivileged user */ +#if defined(HAVE_SETRESGID) + if (setresgid((getgid() == gid)? -1: gid, + (getegid() == gid)? -1: gid, + (SAVED_GROUP_ID == gid)? -1: gid) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = gid; +#elif defined(HAVE_SETREGID) && !defined(OBSOLETE_SETREGID) + if (SAVED_GROUP_ID == gid) { + if (setregid((getgid() == gid)? -1: gid, + (getegid() == gid)? -1: gid) < 0) rb_sys_fail(0); + } else if (getgid() != gid) { + if (setregid(gid, (getegid() == gid)? -1: gid) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = gid; + } else if (/* getgid() == gid && */ getegid() != gid) { + if (setregid(getegid(), gid) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = gid; + if (setregid(gid, -1) < 0) rb_sys_fail(0); + } else { /* getgid() == gid && getegid() == gid */ + if (setregid(-1, SAVED_GROUP_ID) < 0) rb_sys_fail(0); + if (setregid(SAVED_GROUP_ID, gid) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = gid; + if (setregid(gid, -1) < 0) rb_sys_fail(0); + } +#elif defined(HAVE_SETRGID) && defined(HAVE_SETEGID) + if (SAVED_GROUP_ID == gid) { + if (getegid() != gid && setegid(gid) < 0) rb_sys_fail(0); + if (getgid() != gid && setrgid(gid) < 0) rb_sys_fail(0); + } else if (/* SAVED_GROUP_ID != gid && */ getegid() == gid) { + if (getgid() != gid) { + if (setrgid(gid) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = gid; + } else { + if (setrgid(SAVED_GROUP_ID) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = gid; + if (setrgid(gid) < 0) rb_sys_fail(0); + } + } else if (/* getegid() != gid && */ getgid() == gid) { + if (setegid(gid) < 0) rb_sys_fail(0); + if (setrgid(SAVED_GROUP_ID) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = gid; + if (setrgid(gid) < 0) rb_sys_fail(0); + } else { + errno = EPERM; + rb_sys_fail(0); + } +#elif defined HAVE_44BSD_SETGID + if (getgid() == gid) { + /* (r,e,s)==(gid,?,?) ==> (gid,gid,gid) */ + if (setgid(gid) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = gid; + } else { + errno = EPERM; + rb_sys_fail(0); + } +#elif defined HAVE_SETEGID + if (getgid() == gid && SAVED_GROUP_ID == gid) { + if (setegid(gid) < 0) rb_sys_fail(0); + } else { + errno = EPERM; + rb_sys_fail(0); + } +#elif defined HAVE_SETGID + if (getgid() == gid && SAVED_GROUP_ID == gid) { + if (setgid(gid) < 0) rb_sys_fail(0); + } else { + errno = EPERM; + rb_sys_fail(0); + } +#else + rb_notimplement(); +#endif + } + return INT2FIX(gid); +} + + +/* + * call-seq: + * Process.euid => fixnum + * Process::UID.eid => fixnum + * Process::Sys.geteuid => fixnum + * + * Returns the effective user ID for this process. + * + * Process.euid #=> 501 + */ + +static VALUE +proc_geteuid(obj) + VALUE obj; +{ + int euid = geteuid(); + return INT2FIX(euid); +} + + +/* + * call-seq: + * Process.euid= integer + * + * Sets the effective user ID for this process. Not available on all + * platforms. + */ + +static VALUE +proc_seteuid(obj, euid) + VALUE obj, euid; +{ + check_uid_switch(); +#if defined(HAVE_SETRESUID) && !defined(__CHECKER__) + if (setresuid(-1, NUM2INT(euid), -1) < 0) rb_sys_fail(0); +#elif defined HAVE_SETREUID + if (setreuid(-1, NUM2INT(euid)) < 0) rb_sys_fail(0); +#elif defined HAVE_SETEUID + if (seteuid(NUM2INT(euid)) < 0) rb_sys_fail(0); +#elif defined HAVE_SETUID + euid = NUM2INT(euid); + if (euid == getuid()) { + if (setuid(euid) < 0) rb_sys_fail(0); + } + else { + rb_notimplement(); + } +#else + rb_notimplement(); +#endif + return euid; +} + +static VALUE +rb_seteuid_core(euid) + int euid; +{ + int uid; + + check_uid_switch(); + + uid = getuid(); + +#if defined(HAVE_SETRESUID) && !defined(__CHECKER__) + if (uid != euid) { + if (setresuid(-1,euid,euid) < 0) rb_sys_fail(0); + SAVED_USER_ID = euid; + } else { + if (setresuid(-1,euid,-1) < 0) rb_sys_fail(0); + } +#elif defined(HAVE_SETREUID) && !defined(OBSOLETE_SETREUID) + if (setreuid(-1, euid) < 0) rb_sys_fail(0); + if (uid != euid) { + if (setreuid(euid,uid) < 0) rb_sys_fail(0); + if (setreuid(uid,euid) < 0) rb_sys_fail(0); + SAVED_USER_ID = euid; + } +#elif defined HAVE_SETEUID + if (seteuid(euid) < 0) rb_sys_fail(0); +#elif defined HAVE_SETUID + if (geteuid() == 0) rb_sys_fail(0); + if (setuid(euid) < 0) rb_sys_fail(0); +#else + rb_notimplement(); +#endif + return INT2FIX(euid); +} + + +/* + * call-seq: + * Process::UID.grant_privilege(integer) => fixnum + * Process::UID.eid= integer => fixnum + * + * Set the effective user ID, and if possible, the saved user ID of + * the process to the given _integer_. Returns the new + * effective user ID. Not available on all platforms. + * + * [Process.uid, Process.euid] #=> [0, 0] + * Process::UID.grant_privilege(31) #=> 31 + * [Process.uid, Process.euid] #=> [0, 31] + */ + +static VALUE +p_uid_grant_privilege(obj, id) + VALUE obj, id; +{ + return rb_seteuid_core(NUM2INT(id)); +} + + +/* + * call-seq: + * Process.egid => fixnum + * Process::GID.eid => fixnum + * Process::Sys.geteid => fixnum + * + * Returns the effective group ID for this process. Not available on + * all platforms. + * + * Process.egid #=> 500 + */ + +static VALUE +proc_getegid(obj) + VALUE obj; +{ + int egid = getegid(); + + return INT2FIX(egid); +} + + +/* + * call-seq: + * Process.egid = fixnum => fixnum + * + * Sets the effective group ID for this process. Not available on all + * platforms. + */ + +static VALUE +proc_setegid(obj, egid) + VALUE obj, egid; +{ + check_gid_switch(); + +#if defined(HAVE_SETRESGID) && !defined(__CHECKER__) + if (setresgid(-1, NUM2INT(egid), -1) < 0) rb_sys_fail(0); +#elif defined HAVE_SETREGID + if (setregid(-1, NUM2INT(egid)) < 0) rb_sys_fail(0); +#elif defined HAVE_SETEGID + if (setegid(NUM2INT(egid)) < 0) rb_sys_fail(0); +#elif defined HAVE_SETGID + egid = NUM2INT(egid); + if (egid == getgid()) { + if (setgid(egid) < 0) rb_sys_fail(0); + } + else { + rb_notimplement(); + } +#else + rb_notimplement(); +#endif + return egid; +} + +static VALUE +rb_setegid_core(egid) + int egid; +{ + int gid; + + check_gid_switch(); + + gid = getgid(); + +#if defined(HAVE_SETRESGID) && !defined(__CHECKER__) + if (gid != egid) { + if (setresgid(-1,egid,egid) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = egid; + } else { + if (setresgid(-1,egid,-1) < 0) rb_sys_fail(0); + } +#elif defined(HAVE_SETREGID) && !defined(OBSOLETE_SETREGID) + if (setregid(-1, egid) < 0) rb_sys_fail(0); + if (gid != egid) { + if (setregid(egid,gid) < 0) rb_sys_fail(0); + if (setregid(gid,egid) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = egid; + } +#elif defined HAVE_SETEGID + if (setegid(egid) < 0) rb_sys_fail(0); +#elif defined HAVE_SETGID + if (geteuid() == 0 /* root user */) rb_sys_fail(0); + if (setgid(egid) < 0) rb_sys_fail(0); +#else + rb_notimplement(); +#endif + return INT2FIX(egid); +} + + +/* + * call-seq: + * Process::GID.grant_privilege(integer) => fixnum + * Process::GID.eid = integer => fixnum + * + * Set the effective group ID, and if possible, the saved group ID of + * the process to the given _integer_. Returns the new + * effective group ID. Not available on all platforms. + * + * [Process.gid, Process.egid] #=> [0, 0] + * Process::GID.grant_privilege(31) #=> 33 + * [Process.gid, Process.egid] #=> [0, 33] + */ + +static VALUE +p_gid_grant_privilege(obj, id) + VALUE obj, id; +{ + return rb_setegid_core(NUM2INT(id)); +} + + +/* + * call-seq: + * Process::UID.re_exchangeable? => true or false + * + * Returns +true+ if the real and effective user IDs of a + * process may be exchanged on the current platform. + * + */ + +static VALUE +p_uid_exchangeable() +{ +#if defined(HAVE_SETRESUID) && !defined(__CHECKER__) + return Qtrue; +#elif defined(HAVE_SETREUID) && !defined(OBSOLETE_SETREUID) + return Qtrue; +#else + return Qfalse; +#endif +} + + +/* + * call-seq: + * Process::UID.re_exchange => fixnum + * + * Exchange real and effective user IDs and return the new effective + * user ID. Not available on all platforms. + * + * [Process.uid, Process.euid] #=> [0, 31] + * Process::UID.re_exchange #=> 0 + * [Process.uid, Process.euid] #=> [31, 0] + */ + +static VALUE +p_uid_exchange(obj) + VALUE obj; +{ + int uid, euid; + + check_uid_switch(); + + uid = getuid(); + euid = geteuid(); + +#if defined(HAVE_SETRESUID) && !defined(__CHECKER__) + if (setresuid(euid, uid, uid) < 0) rb_sys_fail(0); + SAVED_USER_ID = uid; +#elif defined(HAVE_SETREUID) && !defined(OBSOLETE_SETREUID) + if (setreuid(euid,uid) < 0) rb_sys_fail(0); + SAVED_USER_ID = uid; +#else + rb_notimplement(); +#endif + return INT2FIX(uid); +} + + +/* + * call-seq: + * Process::GID.re_exchangeable? => true or false + * + * Returns +true+ if the real and effective group IDs of a + * process may be exchanged on the current platform. + * + */ + +static VALUE +p_gid_exchangeable() +{ +#if defined(HAVE_SETRESGID) && !defined(__CHECKER__) + return Qtrue; +#elif defined(HAVE_SETREGID) && !defined(OBSOLETE_SETREGID) + return Qtrue; +#else + return Qfalse; +#endif +} + + +/* + * call-seq: + * Process::GID.re_exchange => fixnum + * + * Exchange real and effective group IDs and return the new effective + * group ID. Not available on all platforms. + * + * [Process.gid, Process.egid] #=> [0, 33] + * Process::GID.re_exchange #=> 0 + * [Process.gid, Process.egid] #=> [33, 0] + */ + +static VALUE +p_gid_exchange(obj) + VALUE obj; +{ + int gid, egid; + + check_gid_switch(); + + gid = getgid(); + egid = getegid(); + +#if defined(HAVE_SETRESGID) && !defined(__CHECKER__) + if (setresgid(egid, gid, gid) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = gid; +#elif defined(HAVE_SETREGID) && !defined(OBSOLETE_SETREGID) + if (setregid(egid,gid) < 0) rb_sys_fail(0); + SAVED_GROUP_ID = gid; +#else + rb_notimplement(); +#endif + return INT2FIX(gid); +} + +/* [MG] :FIXME: Is this correct? I'm not sure how to phrase this. */ + +/* + * call-seq: + * Process::UID.sid_available? => true or false + * + * Returns +true+ if the current platform has saved user + * ID functionality. + * + */ + +static VALUE +p_uid_have_saved_id() +{ +#if defined(HAVE_SETRESUID) || defined(HAVE_SETEUID) || defined(_POSIX_SAVED_IDS) + return Qtrue; +#else + return Qfalse; +#endif +} + + +#if defined(HAVE_SETRESUID) || defined(HAVE_SETEUID) || defined(_POSIX_SAVED_IDS) +static VALUE +p_uid_sw_ensure(id) + int id; +{ + under_uid_switch = 0; + return rb_seteuid_core(id); +} + + +/* + * call-seq: + * Process::UID.switch => fixnum + * Process::UID.switch {|| block} => object + * + * Switch the effective and real user IDs of the current process. If + * a <em>block</em> is given, the user IDs will be switched back + * after the block is executed. Returns the new effective user ID if + * called without a block, and the return value of the block if one + * is given. + * + */ + +static VALUE +p_uid_switch(obj) + VALUE obj; +{ + extern int errno; + int uid, euid; + + check_uid_switch(); + + uid = getuid(); + euid = geteuid(); + + if (uid != euid) { + proc_seteuid(obj, INT2FIX(uid)); + if (rb_block_given_p()) { + under_uid_switch = 1; + return rb_ensure(rb_yield, Qnil, p_uid_sw_ensure, SAVED_USER_ID); + } else { + return INT2FIX(euid); + } + } else if (euid != SAVED_USER_ID) { + proc_seteuid(obj, INT2FIX(SAVED_USER_ID)); + if (rb_block_given_p()) { + under_uid_switch = 1; + return rb_ensure(rb_yield, Qnil, p_uid_sw_ensure, euid); + } else { + return INT2FIX(uid); + } + } else { + errno = EPERM; + rb_sys_fail(0); + } + +#else +static VALUE +p_uid_sw_ensure(obj) + VALUE obj; +{ + under_uid_switch = 0; + return p_uid_exchange(obj); +} + +static VALUE +p_uid_switch(obj) + VALUE obj; +{ + extern int errno; + int uid, euid; + + check_uid_switch(); + + uid = getuid(); + euid = geteuid(); + + if (uid == euid) { + errno = EPERM; + rb_sys_fail(0); + } + p_uid_exchange(obj); + if (rb_block_given_p()) { + under_uid_switch = 1; + return rb_ensure(rb_yield, Qnil, p_uid_sw_ensure, obj); + } else { + return INT2FIX(euid); + } +#endif +} + + +/* [MG] :FIXME: Is this correct? I'm not sure how to phrase this. */ + +/* + * call-seq: + * Process::GID.sid_available? => true or false + * + * Returns +true+ if the current platform has saved group + * ID functionality. + * + */ + +static VALUE +p_gid_have_saved_id() +{ +#if defined(HAVE_SETRESGID) || defined(HAVE_SETEGID) || defined(_POSIX_SAVED_IDS) + return Qtrue; +#else + return Qfalse; +#endif +} + +#if defined(HAVE_SETRESGID) || defined(HAVE_SETEGID) || defined(_POSIX_SAVED_IDS) +static VALUE +p_gid_sw_ensure(id) + int id; +{ + under_gid_switch = 0; + return rb_setegid_core(id); +} + + +/* + * call-seq: + * Process::GID.switch => fixnum + * Process::GID.switch {|| block} => object + * + * Switch the effective and real group IDs of the current process. If + * a <em>block</em> is given, the group IDs will be switched back + * after the block is executed. Returns the new effective group ID if + * called without a block, and the return value of the block if one + * is given. + * + */ + +static VALUE +p_gid_switch(obj) + VALUE obj; +{ + extern int errno; + int gid, egid; + + check_gid_switch(); + + gid = getgid(); + egid = getegid(); + + if (gid != egid) { + proc_setegid(obj, INT2FIX(gid)); + if (rb_block_given_p()) { + under_gid_switch = 1; + return rb_ensure(rb_yield, Qnil, p_gid_sw_ensure, SAVED_GROUP_ID); + } else { + return INT2FIX(egid); + } + } else if (egid != SAVED_GROUP_ID) { + proc_setegid(obj, INT2FIX(SAVED_GROUP_ID)); + if (rb_block_given_p()) { + under_gid_switch = 1; + return rb_ensure(rb_yield, Qnil, p_gid_sw_ensure, egid); + } else { + return INT2FIX(gid); + } + } else { + errno = EPERM; + rb_sys_fail(0); + } +#else +static VALUE +p_gid_sw_ensure(obj) + VALUE obj; +{ + under_gid_switch = 0; + return p_gid_exchange(obj); +} + +static VALUE +p_gid_switch(obj) + VALUE obj; +{ + extern int errno; + int gid, egid; + + check_gid_switch(); + + gid = getgid(); + egid = getegid(); + + if (gid == egid) { + errno = EPERM; + rb_sys_fail(0); + } + p_gid_exchange(obj); + if (rb_block_given_p()) { + under_gid_switch = 1; + return rb_ensure(rb_yield, Qnil, p_gid_sw_ensure, obj); + } else { + return INT2FIX(egid); + } +#endif +} + + +/* + * call-seq: + * Process.times => aStructTms + * + * Returns a <code>Tms</code> structure (see <code>Struct::Tms</code> + * on page 388) that contains user and system CPU times for this + * process. + * + * t = Process.times + * [ t.utime, t.stime ] #=> [0.0, 0.02] + */ + +VALUE +rb_proc_times(obj) + VALUE obj; +{ +#if defined(HAVE_TIMES) && !defined(__CHECKER__) +#ifndef HZ +# ifdef CLK_TCK +# define HZ CLK_TCK +# else +# define HZ 60 +# endif +#endif /* HZ */ + struct tms buf; + volatile VALUE utime, stime, cutime, sctime; + + times(&buf); + return rb_struct_new(S_Tms, + utime = rb_float_new((double)buf.tms_utime / HZ), + stime = rb_float_new((double)buf.tms_stime / HZ), + cutime = rb_float_new((double)buf.tms_cutime / HZ), + sctime = rb_float_new((double)buf.tms_cstime / HZ)); +#else + rb_notimplement(); +#endif +} + +VALUE rb_mProcess; +VALUE rb_mProcUID; +VALUE rb_mProcGID; +VALUE rb_mProcID_Syscall; + + +/* + * The <code>Process</code> module is a collection of methods used to + * manipulate processes. + */ + +void +Init_process() +{ + rb_define_virtual_variable("$$", get_pid, 0); + rb_define_readonly_variable("$?", &rb_last_status); + rb_define_global_function("exec", rb_f_exec, -1); + rb_define_global_function("fork", rb_f_fork, 0); + rb_define_global_function("exit!", rb_f_exit_bang, -1); + rb_define_global_function("system", rb_f_system, -1); + rb_define_global_function("spawn", rb_f_spawn, -1); + rb_define_global_function("sleep", rb_f_sleep, -1); + + rb_mProcess = rb_define_module("Process"); + +#if !defined(_WIN32) && !defined(DJGPP) +#ifdef WNOHANG + rb_define_const(rb_mProcess, "WNOHANG", INT2FIX(WNOHANG)); +#else + rb_define_const(rb_mProcess, "WNOHANG", INT2FIX(0)); +#endif +#ifdef WUNTRACED + rb_define_const(rb_mProcess, "WUNTRACED", INT2FIX(WUNTRACED)); +#else + rb_define_const(rb_mProcess, "WUNTRACED", INT2FIX(0)); +#endif +#endif + + rb_define_singleton_method(rb_mProcess, "fork", rb_f_fork, 0); + rb_define_singleton_method(rb_mProcess, "spawn", rb_f_spawn, -1); + rb_define_singleton_method(rb_mProcess, "exit!", rb_f_exit_bang, -1); + rb_define_singleton_method(rb_mProcess, "exit", rb_f_exit, -1); /* in eval.c */ + rb_define_singleton_method(rb_mProcess, "abort", rb_f_abort, -1); /* in eval.c */ + + rb_define_module_function(rb_mProcess, "kill", rb_f_kill, -1); /* in signal.c */ + rb_define_module_function(rb_mProcess, "wait", proc_wait, -1); + rb_define_module_function(rb_mProcess, "wait2", proc_wait2, -1); + rb_define_module_function(rb_mProcess, "waitpid", proc_wait, -1); + rb_define_module_function(rb_mProcess, "waitpid2", proc_wait2, -1); + rb_define_module_function(rb_mProcess, "waitall", proc_waitall, 0); + rb_define_module_function(rb_mProcess, "detach", proc_detach, 1); + + rb_cProcStatus = rb_define_class_under(rb_mProcess, "Status", rb_cObject); + rb_undef_method(CLASS_OF(rb_cProcStatus), "new"); + + rb_define_method(rb_cProcStatus, "==", pst_equal, 1); + rb_define_method(rb_cProcStatus, "&", pst_bitand, 1); + rb_define_method(rb_cProcStatus, ">>", pst_rshift, 1); + rb_define_method(rb_cProcStatus, "to_i", pst_to_i, 0); + rb_define_method(rb_cProcStatus, "to_int", pst_to_i, 0); + rb_define_method(rb_cProcStatus, "to_s", pst_to_s, 0); + rb_define_method(rb_cProcStatus, "inspect", pst_inspect, 0); + + rb_define_method(rb_cProcStatus, "pid", pst_pid, 0); + + rb_define_method(rb_cProcStatus, "stopped?", pst_wifstopped, 0); + rb_define_method(rb_cProcStatus, "stopsig", pst_wstopsig, 0); + rb_define_method(rb_cProcStatus, "signaled?", pst_wifsignaled, 0); + rb_define_method(rb_cProcStatus, "termsig", pst_wtermsig, 0); + rb_define_method(rb_cProcStatus, "exited?", pst_wifexited, 0); + rb_define_method(rb_cProcStatus, "exitstatus", pst_wexitstatus, 0); + rb_define_method(rb_cProcStatus, "success?", pst_success_p, 0); + rb_define_method(rb_cProcStatus, "coredump?", pst_wcoredump, 0); + + rb_define_module_function(rb_mProcess, "pid", get_pid, 0); + rb_define_module_function(rb_mProcess, "ppid", get_ppid, 0); + + rb_define_module_function(rb_mProcess, "getpgrp", proc_getpgrp, 0); + rb_define_module_function(rb_mProcess, "setpgrp", proc_setpgrp, 0); + rb_define_module_function(rb_mProcess, "getpgid", proc_getpgid, 1); + rb_define_module_function(rb_mProcess, "setpgid", proc_setpgid, 2); + + rb_define_module_function(rb_mProcess, "setsid", proc_setsid, 0); + + rb_define_module_function(rb_mProcess, "getpriority", proc_getpriority, 2); + rb_define_module_function(rb_mProcess, "setpriority", proc_setpriority, 3); + +#ifdef HAVE_GETPRIORITY + rb_define_const(rb_mProcess, "PRIO_PROCESS", INT2FIX(PRIO_PROCESS)); + rb_define_const(rb_mProcess, "PRIO_PGRP", INT2FIX(PRIO_PGRP)); + rb_define_const(rb_mProcess, "PRIO_USER", INT2FIX(PRIO_USER)); +#endif + +#ifdef HAVE_GETRLIMIT + rb_define_module_function(rb_mProcess, "getrlimit", proc_getrlimit, 1); +#endif +#ifdef HAVE_SETRLIMIT + rb_define_module_function(rb_mProcess, "setrlimit", proc_setrlimit, 3); +#endif +#ifdef RLIM2NUM +#ifdef RLIM_INFINITY + rb_define_const(rb_mProcess, "RLIM_INFINITY", RLIM2NUM(RLIM_INFINITY)); +#endif +#ifdef RLIM_SAVED_MAX + rb_define_const(rb_mProcess, "RLIM_SAVED_MAX", RLIM2NUM(RLIM_SAVED_MAX)); +#endif +#ifdef RLIM_SAVED_CUR + rb_define_const(rb_mProcess, "RLIM_SAVED_CUR", RLIM2NUM(RLIM_SAVED_CUR)); +#endif +#ifdef RLIMIT_CORE + rb_define_const(rb_mProcess, "RLIMIT_CORE", INT2FIX(RLIMIT_CORE)); +#endif +#ifdef RLIMIT_CPU + rb_define_const(rb_mProcess, "RLIMIT_CPU", INT2FIX(RLIMIT_CPU)); +#endif +#ifdef RLIMIT_DATA + rb_define_const(rb_mProcess, "RLIMIT_DATA", INT2FIX(RLIMIT_DATA)); +#endif +#ifdef RLIMIT_FSIZE + rb_define_const(rb_mProcess, "RLIMIT_FSIZE", INT2FIX(RLIMIT_FSIZE)); +#endif +#ifdef RLIMIT_NOFILE + rb_define_const(rb_mProcess, "RLIMIT_NOFILE", INT2FIX(RLIMIT_NOFILE)); +#endif +#ifdef RLIMIT_STACK + rb_define_const(rb_mProcess, "RLIMIT_STACK", INT2FIX(RLIMIT_STACK)); +#endif +#ifdef RLIMIT_AS + rb_define_const(rb_mProcess, "RLIMIT_AS", INT2FIX(RLIMIT_AS)); +#endif +#ifdef RLIMIT_MEMLOCK + rb_define_const(rb_mProcess, "RLIMIT_MEMLOCK", INT2FIX(RLIMIT_MEMLOCK)); +#endif +#ifdef RLIMIT_NPROC + rb_define_const(rb_mProcess, "RLIMIT_NPROC", INT2FIX(RLIMIT_NPROC)); +#endif +#ifdef RLIMIT_RSS + rb_define_const(rb_mProcess, "RLIMIT_RSS", INT2FIX(RLIMIT_RSS)); +#endif +#ifdef RLIMIT_SBSIZE + rb_define_const(rb_mProcess, "RLIMIT_SBSIZE", INT2FIX(RLIMIT_SBSIZE)); +#endif +#endif + + rb_define_module_function(rb_mProcess, "uid", proc_getuid, 0); + rb_define_module_function(rb_mProcess, "uid=", proc_setuid, 1); + rb_define_module_function(rb_mProcess, "gid", proc_getgid, 0); + rb_define_module_function(rb_mProcess, "gid=", proc_setgid, 1); + rb_define_module_function(rb_mProcess, "euid", proc_geteuid, 0); + rb_define_module_function(rb_mProcess, "euid=", proc_seteuid, 1); + rb_define_module_function(rb_mProcess, "egid", proc_getegid, 0); + rb_define_module_function(rb_mProcess, "egid=", proc_setegid, 1); + rb_define_module_function(rb_mProcess, "initgroups", proc_initgroups, 2); + rb_define_module_function(rb_mProcess, "groups", proc_getgroups, 0); + rb_define_module_function(rb_mProcess, "groups=", proc_setgroups, 1); + rb_define_module_function(rb_mProcess, "maxgroups", proc_getmaxgroups, 0); + rb_define_module_function(rb_mProcess, "maxgroups=", proc_setmaxgroups, 1); + + rb_define_module_function(rb_mProcess, "daemon", proc_daemon, -1); + + rb_define_module_function(rb_mProcess, "times", rb_proc_times, 0); + +#if defined(HAVE_TIMES) || defined(_WIN32) + S_Tms = rb_struct_define("Tms", "utime", "stime", "cutime", "cstime", NULL); +#endif + + SAVED_USER_ID = geteuid(); + SAVED_GROUP_ID = getegid(); + + rb_mProcUID = rb_define_module_under(rb_mProcess, "UID"); + rb_mProcGID = rb_define_module_under(rb_mProcess, "GID"); + + rb_define_module_function(rb_mProcUID, "rid", proc_getuid, 0); + rb_define_module_function(rb_mProcGID, "rid", proc_getgid, 0); + rb_define_module_function(rb_mProcUID, "eid", proc_geteuid, 0); + rb_define_module_function(rb_mProcGID, "eid", proc_getegid, 0); + rb_define_module_function(rb_mProcUID, "change_privilege", p_uid_change_privilege, 1); + rb_define_module_function(rb_mProcGID, "change_privilege", p_gid_change_privilege, 1); + rb_define_module_function(rb_mProcUID, "grant_privilege", p_uid_grant_privilege, 1); + rb_define_module_function(rb_mProcGID, "grant_privilege", p_gid_grant_privilege, 1); + rb_define_alias(rb_mProcUID, "eid=", "grant_privilege"); + rb_define_alias(rb_mProcGID, "eid=", "grant_privilege"); + rb_define_module_function(rb_mProcUID, "re_exchange", p_uid_exchange, 0); + rb_define_module_function(rb_mProcGID, "re_exchange", p_gid_exchange, 0); + rb_define_module_function(rb_mProcUID, "re_exchangeable?", p_uid_exchangeable, 0); + rb_define_module_function(rb_mProcGID, "re_exchangeable?", p_gid_exchangeable, 0); + rb_define_module_function(rb_mProcUID, "sid_available?", p_uid_have_saved_id, 0); + rb_define_module_function(rb_mProcGID, "sid_available?", p_gid_have_saved_id, 0); + rb_define_module_function(rb_mProcUID, "switch", p_uid_switch, 0); + rb_define_module_function(rb_mProcGID, "switch", p_gid_switch, 0); + + rb_mProcID_Syscall = rb_define_module_under(rb_mProcess, "Sys"); + + rb_define_module_function(rb_mProcID_Syscall, "getuid", proc_getuid, 0); + rb_define_module_function(rb_mProcID_Syscall, "geteuid", proc_geteuid, 0); + rb_define_module_function(rb_mProcID_Syscall, "getgid", proc_getgid, 0); + rb_define_module_function(rb_mProcID_Syscall, "getegid", proc_getegid, 0); + + rb_define_module_function(rb_mProcID_Syscall, "setuid", p_sys_setuid, 1); + rb_define_module_function(rb_mProcID_Syscall, "setgid", p_sys_setgid, 1); + + rb_define_module_function(rb_mProcID_Syscall, "setruid", p_sys_setruid, 1); + rb_define_module_function(rb_mProcID_Syscall, "setrgid", p_sys_setrgid, 1); + + rb_define_module_function(rb_mProcID_Syscall, "seteuid", p_sys_seteuid, 1); + rb_define_module_function(rb_mProcID_Syscall, "setegid", p_sys_setegid, 1); + + rb_define_module_function(rb_mProcID_Syscall, "setreuid", p_sys_setreuid, 2); + rb_define_module_function(rb_mProcID_Syscall, "setregid", p_sys_setregid, 2); + + rb_define_module_function(rb_mProcID_Syscall, "setresuid", p_sys_setresuid, 3); + rb_define_module_function(rb_mProcID_Syscall, "setresgid", p_sys_setresgid, 3); + rb_define_module_function(rb_mProcID_Syscall, "issetugid", p_sys_issetugid, 0); +} +/********************************************************************** + + random.c - + + $Author: nobu $ + $Date: 2005/02/12 06:07:34 $ + created at: Fri Dec 24 16:39:21 JST 1993 + + Copyright (C) 1993-2003 Yukihiro Matsumoto + +**********************************************************************/ + +/* +This is based on trimmed version of MT19937. To get the original version, +contact <http://www.math.keio.ac.jp/~matumoto/emt.html>. + +The original copyright notice follows. + + A C-program for MT19937, with initialization improved 2002/2/10. + Coded by Takuji Nishimura and Makoto Matsumoto. + This is a faster version by taking Shawn Cokus's optimization, + Matthe Bellew's simplification, Isaku Wada's real version. + + Before using, initialize the state by using init_genrand(seed) + or init_by_array(init_key, key_length). + + Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The names of its contributors may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + Any feedback is very welcome. + http://www.math.keio.ac.jp/matumoto/emt.html + email: matumoto@math.keio.ac.jp +*/ + +/* Period parameters */ +#define N 624 +#define M 397 +#define MATRIX_A 0x9908b0dfUL /* constant vector a */ +#define UMASK 0x80000000UL /* most significant w-r bits */ +#define LMASK 0x7fffffffUL /* least significant r bits */ +#define MIXBITS(u,v) ( ((u) & UMASK) | ((v) & LMASK) ) +#define TWIST(u,v) ((MIXBITS(u,v) >> 1) ^ ((v)&1UL ? MATRIX_A : 0UL)) + +static unsigned long state[N]; /* the array for the state vector */ +static int left = 1; +static int initf = 0; +static unsigned long *next; + +/* initializes state[N] with a seed */ +static void +init_genrand(s) + unsigned long s; +{ + int j; + state[0]= s & 0xffffffffUL; + for (j=1; j<N; j++) { + state[j] = (1812433253UL * (state[j-1] ^ (state[j-1] >> 30)) + j); + /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */ + /* In the previous versions, MSBs of the seed affect */ + /* only MSBs of the array state[]. */ + /* 2002/01/09 modified by Makoto Matsumoto */ + state[j] &= 0xffffffffUL; /* for >32 bit machines */ + } + left = 1; initf = 1; +} + +/* initialize by an array with array-length */ +/* init_key is the array for initializing keys */ +/* key_length is its length */ +/* slight change for C++, 2004/2/26 */ +static void +init_by_array(unsigned long init_key[], int key_length) +{ + int i, j, k; + init_genrand(19650218UL); + i=1; j=0; + k = (N>key_length ? N : key_length); + for (; k; k--) { + state[i] = (state[i] ^ ((state[i-1] ^ (state[i-1] >> 30)) * 1664525UL)) + + init_key[j] + j; /* non linear */ + state[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */ + i++; j++; + if (i>=N) { state[0] = state[N-1]; i=1; } + if (j>=key_length) j=0; + } + for (k=N-1; k; k--) { + state[i] = (state[i] ^ ((state[i-1] ^ (state[i-1] >> 30)) * 1566083941UL)) + - i; /* non linear */ + state[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */ + i++; + if (i>=N) { state[0] = state[N-1]; i=1; } + } + + state[0] = 0x80000000UL; /* MSB is 1; assuring non-zero initial array */ + left = 1; initf = 1; +} + +static void +next_state() +{ + unsigned long *p=state; + int j; + + /* if init_genrand() has not been called, */ + /* a default initial seed is used */ + if (initf==0) init_genrand(5489UL); + + left = N; + next = state; + + for (j=N-M+1; --j; p++) + *p = p[M] ^ TWIST(p[0], p[1]); + + for (j=M; --j; p++) + *p = p[M-N] ^ TWIST(p[0], p[1]); + + *p = p[M-N] ^ TWIST(p[0], state[0]); +} + +/* generates a random number on [0,0xffffffff]-interval */ +static unsigned long +genrand_int32(void) +{ + unsigned long y; + + if (--left == 0) next_state(); + y = *next++; + + /* Tempering */ + y ^= (y >> 11); + y ^= (y << 7) & 0x9d2c5680UL; + y ^= (y << 15) & 0xefc60000UL; + y ^= (y >> 18); + + return y; +} + +/* generates a random number on [0,1) with 53-bit resolution*/ +static double +genrand_real(void) +{ + unsigned long a=genrand_int32()>>5, b=genrand_int32()>>6; + return(a*67108864.0+b)*(1.0/9007199254740992.0); +} +/* These real versions are due to Isaku Wada, 2002/01/09 added */ + +#undef N +#undef M + +/* These real versions are due to Isaku Wada, 2002/01/09 added */ + +#include "ruby.h" + +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#include <time.h> +#include <sys/types.h> +#include <sys/stat.h> +#ifdef HAVE_FCNTL_H +#include <fcntl.h> +#endif + +static int first = 1; +static VALUE saved_seed = INT2FIX(0); + +static VALUE +rand_init(vseed) + VALUE vseed; +{ + volatile VALUE seed; + VALUE old; + long len; + unsigned long *buf; + + seed = rb_to_int(vseed); + switch (TYPE(seed)) { + case T_FIXNUM: + len = sizeof(VALUE); + break; + case T_BIGNUM: + len = RBIGNUM(seed)->len * SIZEOF_BDIGITS; + if (len == 0) + len = 4; + break; + default: + rb_raise(rb_eTypeError, "failed to convert %s into Integer", + rb_obj_classname(vseed)); + } + len = (len + 3) / 4; /* number of 32bit words */ + buf = ALLOC_N(unsigned long, len); /* allocate longs for init_by_array */ + memset(buf, 0, len * sizeof(long)); + if (FIXNUM_P(seed)) { + buf[0] = FIX2ULONG(seed) & 0xffffffff; +#if SIZEOF_LONG > 4 + buf[1] = FIX2ULONG(seed) >> 32; +#endif + } + else { + int i, j; + for (i = RBIGNUM(seed)->len-1; 0 <= i; i--) { + j = i * SIZEOF_BDIGITS / 4; +#if SIZEOF_BDIGITS < 4 + buf[j] <<= SIZEOF_BDIGITS * 8; +#endif + buf[j] |= ((BDIGIT *)RBIGNUM(seed)->digits)[i]; + } + } + while (1 < len && buf[len-1] == 0) { + len--; + } + if (len <= 1) { + init_genrand(buf[0]); + } + else { + if (buf[len-1] == 1) /* remove leading-zero-guard */ + len--; + init_by_array(buf, len); + } + first = 0; + old = saved_seed; + saved_seed = seed; + free(buf); + return old; +} + +static VALUE +random_seed() +{ + static int n = 0; + struct timeval tv; + int fd; + struct stat statbuf; + + int seed_len; + BDIGIT *digits; + unsigned long *seed; + NEWOBJ(big, struct RBignum); + OBJSETUP(big, rb_cBignum, T_BIGNUM); + + seed_len = 4 * sizeof(long); + big->sign = 1; + big->len = seed_len / SIZEOF_BDIGITS + 1; + digits = big->digits = ALLOC_N(BDIGIT, big->len); + seed = (unsigned long *)big->digits; + + memset(digits, 0, big->len * SIZEOF_BDIGITS); + +#ifdef S_ISCHR + if ((fd = open("/dev/urandom", O_RDONLY +#ifdef O_NONBLOCK + |O_NONBLOCK +#endif +#ifdef O_NOCTTY + |O_NOCTTY +#endif +#ifdef O_NOFOLLOW + |O_NOFOLLOW +#endif + )) >= 0) { + if (fstat(fd, &statbuf) == 0 && S_ISCHR(statbuf.st_mode)) { + read(fd, seed, seed_len); + } + close(fd); + } +#endif + + gettimeofday(&tv, 0); + seed[0] ^= tv.tv_usec; + seed[1] ^= tv.tv_sec; + seed[2] ^= getpid() ^ (n++ << 16); + seed[3] ^= (unsigned long)&seed; + + /* set leading-zero-guard if need. */ + digits[big->len-1] = digits[big->len-2] <= 1 ? 1 : 0; + + return rb_big_norm((VALUE)big); +} + +/* + * call-seq: + * srand(number=0) => old_seed + * + * Seeds the pseudorandom number generator to the value of + * <i>number</i>.<code>to_i.abs</code>. If <i>number</i> is omitted + * or zero, seeds the generator using a combination of the time, the + * process id, and a sequence number. (This is also the behavior if + * <code>Kernel::rand</code> is called without previously calling + * <code>srand</code>, but without the sequence.) By setting the seed + * to a known value, scripts can be made deterministic during testing. + * The previous seed value is returned. Also see <code>Kernel::rand</code>. + */ + +static VALUE +rb_f_srand(argc, argv, obj) + int argc; + VALUE *argv; + VALUE obj; +{ + VALUE seed, old; + + rb_secure(4); + if (rb_scan_args(argc, argv, "01", &seed) == 0) { + seed = random_seed(); + } + old = rand_init(seed); + + return old; +} + +static unsigned long +make_mask(unsigned long x) +{ + x = x | x >> 1; + x = x | x >> 2; + x = x | x >> 4; + x = x | x >> 8; + x = x | x >> 16; +#if 4 < SIZEOF_LONG + x = x | x >> 32; +#endif + return x; +} + +static unsigned long +limited_rand(unsigned long limit) +{ + unsigned long mask = make_mask(limit); + int i; + unsigned long val; + + retry: + val = 0; + for (i = SIZEOF_LONG/4-1; 0 <= i; i--) { + if (mask >> (i * 32)) { + val |= genrand_int32() << (i * 32); + val &= mask; + if (limit < val) + goto retry; + } + } + return val; +} + +static VALUE +limited_big_rand(struct RBignum *limit) +{ + unsigned long mask, lim, rnd; + struct RBignum *val; + int i, len, boundary; + + len = (limit->len * SIZEOF_BDIGITS + 3) / 4; + val = (struct RBignum *)rb_big_clone((VALUE)limit); + val->sign = 1; +#if SIZEOF_BDIGITS == 2 +# define BIG_GET32(big,i) (((BDIGIT *)(big)->digits)[(i)*2] | \ + ((i)*2+1 < (big)->len ? (((BDIGIT *)(big)->digits)[(i)*2+1] << 16) \ + : 0)) +# define BIG_SET32(big,i,d) ((((BDIGIT *)(big)->digits)[(i)*2] = (d) & 0xffff), \ + ((i)*2+1 < (big)->len ? (((BDIGIT *)(big)->digits)[(i)*2+1] = (d) >> 16) \ + : 0)) +#else + /* SIZEOF_BDIGITS == 4 */ +# define BIG_GET32(big,i) (((BDIGIT *)(big)->digits)[i]) +# define BIG_SET32(big,i,d) (((BDIGIT *)(big)->digits)[i] = (d)) +#endif + retry: + mask = 0; + boundary = 1; + for (i = len-1; 0 <= i; i--) { + lim = BIG_GET32(limit, i); + mask = mask ? 0xffffffff : make_mask(lim); + if (mask) { + rnd = genrand_int32() & mask; + if (boundary) { + if (lim < rnd) + goto retry; + if (rnd < lim) + boundary = 0; + } + } + else { + rnd = 0; + } + BIG_SET32(val, i, rnd); + } + return rb_big_norm((VALUE)val); +} + +/* + * call-seq: + * rand(max=0) => number + * + * Converts <i>max</i> to an integer using max1 = + * max<code>.to_i.abs</code>. If the result is zero, returns a + * pseudorandom floating point number greater than or equal to 0.0 and + * less than 1.0. Otherwise, returns a pseudorandom integer greater + * than or equal to zero and less than max1. <code>Kernel::srand</code> + * may be used to ensure repeatable sequences of random numbers between + * different runs of the program. Ruby currently uses a modified + * Mersenne Twister with a period of 219937-1. + * + * srand 1234 #=> 0 + * [ rand, rand ] #=> [0.191519450163469, 0.49766366626136] + * [ rand(10), rand(1000) ] #=> [6, 817] + * srand 1234 #=> 1234 + * [ rand, rand ] #=> [0.191519450163469, 0.49766366626136] + */ + +static VALUE +rb_f_rand(argc, argv, obj) + int argc; + VALUE *argv; + VALUE obj; +{ + VALUE vmax; + long val, max; + + rb_scan_args(argc, argv, "01", &vmax); + if (first) { + rand_init(random_seed()); + } + switch (TYPE(vmax)) { + case T_FLOAT: + if (RFLOAT(vmax)->value <= LONG_MAX && RFLOAT(vmax)->value >= LONG_MIN) { + max = (long)RFLOAT(vmax)->value; + break; + } + if (RFLOAT(vmax)->value < 0) + vmax = rb_dbl2big(-RFLOAT(vmax)->value); + else + vmax = rb_dbl2big(RFLOAT(vmax)->value); + /* fall through */ + case T_BIGNUM: + bignum: + { + struct RBignum *limit = (struct RBignum *)vmax; + if (!limit->sign) { + limit = (struct RBignum *)rb_big_clone(vmax); + limit->sign = 1; + } + limit = (struct RBignum *)rb_big_minus((VALUE)limit, INT2FIX(1)); + if (FIXNUM_P((VALUE)limit)) { + if (FIX2LONG((VALUE)limit) == -1) + return rb_float_new(genrand_real()); + return LONG2NUM(limited_rand(FIX2LONG((VALUE)limit))); + } + return limited_big_rand(limit); + } + case T_NIL: + max = 0; + break; + default: + vmax = rb_Integer(vmax); + if (TYPE(vmax) == T_BIGNUM) goto bignum; + case T_FIXNUM: + max = FIX2LONG(vmax); + break; + } + + if (max == 0) { + return rb_float_new(genrand_real()); + } + if (max < 0) max = -max; + val = limited_rand(max-1); + return LONG2NUM(val); +} + +void +Init_Random() +{ + rb_define_global_function("srand", rb_f_srand, -1); + rb_define_global_function("rand", rb_f_rand, -1); + rb_global_variable(&saved_seed); +} +/********************************************************************** + + range.c - + + $Author: matz $ + $Date: 2005/03/04 06:47:41 $ + created at: Thu Aug 19 17:46:47 JST 1993 + + Copyright (C) 1993-2003 Yukihiro Matsumoto + +**********************************************************************/ + +#include "ruby.h" + +VALUE rb_cRange; +static ID id_cmp, id_succ, id_beg, id_end, id_excl; + +#define EXCL(r) RTEST(rb_ivar_get((r), id_excl)) +#define SET_EXCL(r,v) rb_ivar_set((r), id_excl, (v) ? Qtrue : Qfalse) + +static VALUE +range_failed() +{ + rb_raise(rb_eArgError, "bad value for range"); + return Qnil; /* dummy */ +} + +static VALUE +range_check(args) + VALUE *args; +{ + VALUE v; + + v = rb_funcall(args[0], id_cmp, 1, args[1]); + if (NIL_P(v)) range_failed(); + return Qnil; +} + +static void +range_init(range, beg, end, exclude_end) + VALUE range, beg, end; + int exclude_end; +{ + VALUE args[2]; + + args[0] = beg; + args[1] = end; + + if (!FIXNUM_P(beg) || !FIXNUM_P(end)) { + rb_rescue(range_check, (VALUE)args, range_failed, 0); + } + + SET_EXCL(range, exclude_end); + rb_ivar_set(range, id_beg, beg); + rb_ivar_set(range, id_end, end); +} + +VALUE +rb_range_new(beg, end, exclude_end) + VALUE beg, end; + int exclude_end; +{ + VALUE range = rb_obj_alloc(rb_cRange); + + range_init(range, beg, end, exclude_end); + return range; +} + +/* + * call-seq: + * Range.new(start, end, exclusive=false) => range + * + * Constructs a range using the given <i>start</i> and <i>end</i>. If the third + * parameter is omitted or is <code>false</code>, the <i>range</i> will include + * the end object; otherwise, it will be excluded. + */ + +static VALUE +range_initialize(argc, argv, range) + int argc; + VALUE *argv; + VALUE range; +{ + VALUE beg, end, flags; + + rb_scan_args(argc, argv, "21", &beg, &end, &flags); + /* Ranges are immutable, so that they should be initialized only once. */ + if (rb_ivar_defined(range, id_beg)) { + rb_name_error(rb_intern("initialize"), "`initialize' called twice"); + } + range_init(range, beg, end, RTEST(flags)); + return Qnil; +} + + +/* + * call-seq: + * rng.exclude_end? => true or false + * + * Returns <code>true</code> if <i>rng</i> excludes its end value. + */ + +static VALUE +range_exclude_end_p(range) + VALUE range; +{ + return EXCL(range) ? Qtrue : Qfalse; +} + + +/* + * call-seq: + * rng == obj => true or false + * + * Returns <code>true</code> only if <i>obj</i> is a Range, has equivalent + * beginning and end items (by comparing them with <code>==</code>), and has + * the same #exclude_end? setting as <i>rng</t>. + * + * (0..2) == (0..2) #=> true + * (0..2) == Range.new(0,2) #=> true + * (0..2) == (0...2) #=> false + * + */ + +static VALUE +range_eq(range, obj) + VALUE range, obj; +{ + if (range == obj) return Qtrue; + if (!rb_obj_is_instance_of(obj, rb_obj_class(range))) + return Qfalse; + + if (!rb_equal(rb_ivar_get(range, id_beg), rb_ivar_get(obj, id_beg))) + return Qfalse; + if (!rb_equal(rb_ivar_get(range, id_end), rb_ivar_get(obj, id_end))) + return Qfalse; + + if (EXCL(range) != EXCL(obj)) return Qfalse; + + return Qtrue; +} + +static int +r_lt(a, b) + VALUE a, b; +{ + VALUE r = rb_funcall(a, id_cmp, 1, b); + + if (NIL_P(r)) return Qfalse; + if (rb_cmpint(r, a, b) < 0) return Qtrue; + return Qfalse; +} + +static int +r_le(a, b) + VALUE a, b; +{ + int c; + VALUE r = rb_funcall(a, id_cmp, 1, b); + + if (NIL_P(r)) return Qfalse; + c = rb_cmpint(r, a, b); + if (c == 0) return INT2FIX(0); + if (c < 0) return Qtrue; + return Qfalse; +} + + +/* + * call-seq: + * rng.eql?(obj) => true or false + * + * Returns <code>true</code> only if <i>obj</i> is a Range, has equivalent + * beginning and end items (by comparing them with #eql?), and has the same + * #exclude_end? setting as <i>rng</i>. + * + * (0..2) == (0..2) #=> true + * (0..2) == Range.new(0,2) #=> true + * (0..2) == (0...2) #=> false + * + */ + +static VALUE +range_eql(range, obj) + VALUE range, obj; +{ + if (range == obj) return Qtrue; + if (!rb_obj_is_instance_of(obj, rb_obj_class(range))) + return Qfalse; + + if (!rb_eql(rb_ivar_get(range, id_beg), rb_ivar_get(obj, id_beg))) + return Qfalse; + if (!rb_eql(rb_ivar_get(range, id_end), rb_ivar_get(obj, id_end))) + return Qfalse; + + if (EXCL(range) != EXCL(obj)) return Qfalse; + + return Qtrue; +} + +/* + * call-seq: + * rng.hash => fixnum + * + * Generate a hash value such that two ranges with the same start and + * end points, and the same value for the "exclude end" flag, generate + * the same hash value. + */ + +static VALUE +range_hash(range) + VALUE range; +{ + long hash = EXCL(range); + VALUE v; + + v = rb_hash(rb_ivar_get(range, id_beg)); + hash ^= v << 1; + v = rb_hash(rb_ivar_get(range, id_end)); + hash ^= v << 9; + hash ^= EXCL(range) << 24; + + return LONG2FIX(hash); +} + +static VALUE +str_step(args) + VALUE *args; +{ + return rb_str_upto(args[0], args[1], EXCL(args[2])); +} + +static void +range_each_func(range, func, v, e, arg) + VALUE range; + void (*func) _((VALUE, void*)); + VALUE v, e; + void *arg; +{ + int c; + + if (EXCL(range)) { + while (r_lt(v, e)) { + (*func)(v, arg); + v = rb_funcall(v, id_succ, 0, 0); + } + } + else { + while (RTEST(c = r_le(v, e))) { + (*func)(v, arg); + if (c == INT2FIX(0)) break; + v = rb_funcall(v, id_succ, 0, 0); + } + } +} + +static VALUE +step_i(i, iter) + VALUE i; + long *iter; +{ + iter[0]--; + if (iter[0] == 0) { + rb_yield(i); + iter[0] = iter[1]; + } + return Qnil; +} + +/* + * call-seq: + * rng.step(n=1) {| obj | block } => rng + * + * Iterates over <i>rng</i>, passing each <i>n</i>th element to the block. If + * the range contains numbers or strings, natural ordering is used. Otherwise + * <code>step</code> invokes <code>succ</code> to iterate through range + * elements. The following code uses class <code>Xs</code>, which is defined + * in the class-level documentation. + * + * range = Xs.new(1)..Xs.new(10) + * range.step(2) {|x| puts x} + * range.step(3) {|x| puts x} + * + * <em>produces:</em> + * + * 1 x + * 3 xxx + * 5 xxxxx + * 7 xxxxxxx + * 9 xxxxxxxxx + * 1 x + * 4 xxxx + * 7 xxxxxxx + * 10 xxxxxxxxxx + */ + + +static VALUE +range_step(argc, argv, range) + int argc; + VALUE *argv; + VALUE range; +{ + VALUE b, e, step; + long unit; + + b = rb_ivar_get(range, id_beg); + e = rb_ivar_get(range, id_end); + if (rb_scan_args(argc, argv, "01", &step) == 0) { + step = INT2FIX(1); + } + + unit = NUM2LONG(step); + if (unit < 0) { + rb_raise(rb_eArgError, "step can't be negative"); + } + if (FIXNUM_P(b) && FIXNUM_P(e)) { /* fixnums are special */ + long end = FIX2LONG(e); + long i; + + if (unit == 0) rb_raise(rb_eArgError, "step can't be 0"); + if (!EXCL(range)) end += 1; + for (i=FIX2LONG(b); i<end; i+=unit) { + rb_yield(LONG2NUM(i)); + } + } + else { + VALUE tmp = rb_check_string_type(b); + + if (!NIL_P(tmp)) { + VALUE args[5]; + long iter[2]; + + b = tmp; + if (unit == 0) rb_raise(rb_eArgError, "step can't be 0"); + args[0] = b; args[1] = e; args[2] = range; + iter[0] = 1; iter[1] = unit; + rb_iterate((VALUE(*)_((VALUE)))str_step, (VALUE)args, step_i, + (VALUE)iter); + } + else if (rb_obj_is_kind_of(b, rb_cNumeric)) { + ID c = rb_intern(EXCL(range) ? "<" : "<="); + + if (rb_equal(step, INT2FIX(0))) rb_raise(rb_eArgError, "step can't be 0"); + while (RTEST(rb_funcall(b, c, 1, e))) { + rb_yield(b); + b = rb_funcall(b, '+', 1, step); + } + } + else { + long args[2]; + + if (unit == 0) rb_raise(rb_eArgError, "step can't be 0"); + if (!rb_respond_to(b, id_succ)) { + rb_raise(rb_eTypeError, "can't iterate from %s", + rb_obj_classname(b)); + } + + args[0] = 1; + args[1] = unit; + range_each_func(range, step_i, b, e, args); + } + } + return range; +} + +static void +each_i(v, arg) + VALUE v; + void *arg; +{ + rb_yield(v); +} + +/* + * call-seq: + * rng.each {| i | block } => rng + * + * Iterates over the elements <i>rng</i>, passing each in turn to the + * block. You can only iterate if the start object of the range + * supports the +succ+ method (which means that you can't iterate over + * ranges of +Float+ objects). + * + * (10..15).each do |n| + * print n, ' ' + * end + * + * <em>produces:</em> + * + * 10 11 12 13 14 15 + */ + +static VALUE +range_each(range) + VALUE range; +{ + VALUE beg, end; + + beg = rb_ivar_get(range, id_beg); + end = rb_ivar_get(range, id_end); + + if (!rb_respond_to(beg, id_succ)) { + rb_raise(rb_eTypeError, "can't iterate from %s", + rb_obj_classname(beg)); + } + if (FIXNUM_P(beg) && FIXNUM_P(end)) { /* fixnums are special */ + long lim = FIX2LONG(end); + long i; + + if (!EXCL(range)) lim += 1; + for (i=FIX2LONG(beg); i<lim; i++) { + rb_yield(LONG2NUM(i)); + } + } + else if (TYPE(beg) == T_STRING) { + VALUE args[5]; + long iter[2]; + + args[0] = beg; args[1] = end; args[2] = range; + iter[0] = 1; iter[1] = 1; + rb_iterate((VALUE(*)_((VALUE)))str_step, (VALUE)args, step_i, + (VALUE)iter); + } + else { + range_each_func(range, each_i, beg, end, NULL); + } + return range; +} + +/* + * call-seq: + * rng.first => obj + * rng.begin => obj + * + * Returns the first object in <i>rng</i>. + */ + +static VALUE +range_first(range) + VALUE range; +{ + return rb_ivar_get(range, id_beg); +} + + +/* + * call-seq: + * rng.end => obj + * rng.last => obj + * + * Returns the object that defines the end of <i>rng</i>. + * + * (1..10).end #=> 10 + * (1...10).end #=> 10 + */ + + +static VALUE +range_last(range) + VALUE range; +{ + return rb_ivar_get(range, id_end); +} + +VALUE +rb_range_beg_len(range, begp, lenp, len, err) + VALUE range; + long *begp, *lenp; + long len; + int err; +{ + long beg, end, b, e; + + if (!rb_obj_is_kind_of(range, rb_cRange)) return Qfalse; + + beg = b = NUM2LONG(rb_ivar_get(range, id_beg)); + end = e = NUM2LONG(rb_ivar_get(range, id_end)); + + if (beg < 0) { + beg += len; + if (beg < 0) goto out_of_range; + } + if (err == 0 || err == 2) { + if (beg > len) goto out_of_range; + if (end > len) end = len; + } + if (end < 0) end += len; + if (!EXCL(range)) end++; /* include end point */ + len = end - beg; + if (len < 0) len = 0; + + *begp = beg; + *lenp = len; + return Qtrue; + + out_of_range: + if (err) { + rb_raise(rb_eRangeError, "%ld..%s%ld out of range", + b, EXCL(range)? "." : "", e); + } + return Qnil; +} + +/* + * call-seq: + * rng.to_s => string + * + * Convert this range object to a printable form. + */ + +static VALUE +range_to_s(range) + VALUE range; +{ + VALUE str, str2; + + str = rb_obj_as_string(rb_ivar_get(range, id_beg)); + str2 = rb_obj_as_string(rb_ivar_get(range, id_end)); + str = rb_str_dup(str); + rb_str_cat(str, "...", EXCL(range)?3:2); + rb_str_append(str, str2); + OBJ_INFECT(str, str2); + + return str; +} + +/* + * call-seq: + * rng.inspect => string + * + * Convert this range object to a printable form (using + * <code>inspect</code> to convert the start and end + * objects). + */ + + +static VALUE +range_inspect(range) + VALUE range; +{ + VALUE str, str2; + + str = rb_inspect(rb_ivar_get(range, id_beg)); + str2 = rb_inspect(rb_ivar_get(range, id_end)); + str = rb_str_dup(str); + rb_str_cat(str, "...", EXCL(range)?3:2); + rb_str_append(str, str2); + OBJ_INFECT(str, str2); + + return str; +} + +/* + * call-seq: + * rng === obj => true or false + * rng.member?(val) => true or false + * rng.include?(val) => true or false + * + * Returns <code>true</code> if <i>obj</i> is an element of + * <i>rng</i>, <code>false</code> otherwise. Conveniently, + * <code>===</code> is the comparison operator used by + * <code>case</code> statements. + * + * case 79 + * when 1..50 then print "low\n" + * when 51..75 then print "medium\n" + * when 76..100 then print "high\n" + * end + * + * <em>produces:</em> + * + * high + */ + +static VALUE +range_include(range, val) + VALUE range, val; +{ + VALUE beg, end; + + beg = rb_ivar_get(range, id_beg); + end = rb_ivar_get(range, id_end); + if (r_le(beg, val)) { + if (EXCL(range)) { + if (r_lt(val, end)) return Qtrue; + } + else { + if (r_le(val, end)) return Qtrue; + } + } + return Qfalse; +} + + +/* A <code>Range</code> represents an interval---a set of values with a + * start and an end. Ranges may be constructed using the + * <em>s</em><code>..</code><em>e</em> and + * <em>s</em><code>...</code><em>e</em> literals, or with + * <code>Range::new</code>. Ranges constructed using <code>..</code> + * run from the start to the end inclusively. Those created using + * <code>...</code> exclude the end value. When used as an iterator, + * ranges return each value in the sequence. + * + * (-1..-5).to_a #=> [] + * (-5..-1).to_a #=> [-5, -4, -3, -2, -1] + * ('a'..'e').to_a #=> ["a", "b", "c", "d", "e"] + * ('a'...'e').to_a #=> ["a", "b", "c", "d"] + * + * Ranges can be constructed using objects of any type, as long as the + * objects can be compared using their <code><=></code> operator and + * they support the <code>succ</code> method to return the next object + * in sequence. + * + * class Xs # represent a string of 'x's + * include Comparable + * attr :length + * def initialize(n) + * @length = n + * end + * def succ + * Xs.new(@length + 1) + * end + * def <=>(other) + * @length <=> other.length + * end + * def to_s + * sprintf "%2d #{inspect}", @length + * end + * def inspect + * 'x' * @length + * end + * end + * + * r = Xs.new(3)..Xs.new(6) #=> xxx..xxxxxx + * r.to_a #=> [xxx, xxxx, xxxxx, xxxxxx] + * r.member?(Xs.new(5)) #=> true + * + * In the previous code example, class <code>Xs</code> includes the + * <code>Comparable</code> module. This is because + * <code>Enumerable#member?</code> checks for equality using + * <code>==</code>. Including <code>Comparable</code> ensures that the + * <code>==</code> method is defined in terms of the <code><=></code> + * method implemented in <code>Xs</code>. + * + */ + +void +Init_Range() +{ + rb_cRange = rb_define_class("Range", rb_cObject); + rb_include_module(rb_cRange, rb_mEnumerable); + rb_define_method(rb_cRange, "initialize", range_initialize, -1); + rb_define_method(rb_cRange, "==", range_eq, 1); + rb_define_method(rb_cRange, "===", range_include, 1); + rb_define_method(rb_cRange, "eql?", range_eql, 1); + rb_define_method(rb_cRange, "hash", range_hash, 0); + rb_define_method(rb_cRange, "each", range_each, 0); + rb_define_method(rb_cRange, "step", range_step, -1); + rb_define_method(rb_cRange, "first", range_first, 0); + rb_define_method(rb_cRange, "last", range_last, 0); + rb_define_method(rb_cRange, "begin", range_first, 0); + rb_define_method(rb_cRange, "end", range_last, 0); + rb_define_method(rb_cRange, "to_s", range_to_s, 0); + rb_define_method(rb_cRange, "inspect", range_inspect, 0); + + rb_define_method(rb_cRange, "exclude_end?", range_exclude_end_p, 0); + + rb_define_method(rb_cRange, "member?", range_include, 1); + rb_define_method(rb_cRange, "include?", range_include, 1); + + id_cmp = rb_intern("<=>"); + id_succ = rb_intern("succ"); + id_beg = rb_intern("begin"); + id_end = rb_intern("end"); + id_excl = rb_intern("excl"); +} +/********************************************************************** + + re.c - + + $Author: nobu $ + created at: Mon Aug 9 18:24:49 JST 1993 + + Copyright (C) 1993-2003 Yukihiro Matsumoto + +**********************************************************************/ + +#include "ruby.h" +#include "re.h" +#include "regint.h" +#include <ctype.h> + +#define MBCTYPE_ASCII 0 +#define MBCTYPE_EUC 1 +#define MBCTYPE_SJIS 2 +#define MBCTYPE_UTF8 3 + +static VALUE rb_eRegexpError; + +#define BEG(no) regs->beg[no] +#define END(no) regs->end[no] + +#if 'a' == 97 /* it's ascii */ +static const char casetable[] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + /* ' ' '!' '"' '#' '$' '%' '&' ''' */ + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + /* '(' ')' '*' '+' ',' '-' '.' '/' */ + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + /* '0' '1' '2' '3' '4' '5' '6' '7' */ + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + /* '8' '9' ':' ';' '<' '=' '>' '?' */ + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + /* '@' 'A' 'B' 'C' 'D' 'E' 'F' 'G' */ + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + /* 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O' */ + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + /* 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' */ + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + /* 'X' 'Y' 'Z' '[' '\' ']' '^' '_' */ + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + /* '`' 'a' 'b' 'c' 'd' 'e' 'f' 'g' */ + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + /* 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o' */ + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + /* 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' */ + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + /* 'x' 'y' 'z' '{' '|' '}' '~' */ + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', +}; +#else +# error >>> "You lose. You will need a translation table for your character set." <<< +#endif + +int +rb_memcicmp(p1, p2, len) + char *p1, *p2; + long len; +{ + int tmp; + + while (len--) { + if (tmp = casetable[(unsigned)*p1++] - casetable[(unsigned)*p2++]) + return tmp; + } + return 0; +} + +int +rb_memcmp(p1, p2, len) + char *p1, *p2; + long len; +{ + if (!ruby_ignorecase) { + return memcmp(p1, p2, len); + } + return rb_memcicmp(p1, p2, len); +} + +long +rb_memsearch(x0, m, y0, n) + char *x0, *y0; + long m, n; +{ + unsigned char *x = (unsigned char *)x0, *y = (unsigned char *)y0; + unsigned char *s, *e; + long i; + int d; + unsigned long hx, hy; + +#define KR_REHASH(a, b, h) (((h) << 1) - ((long)(a)<<d) + (b)) + + if (m > n) return -1; + s = y; e = s + n - m; + + /* Preprocessing */ + /* computes d = 2^(m-1) with + the left-shift operator */ + d = sizeof(hx) * CHAR_BIT - 1; + if (d > m) d = m; + + if (ruby_ignorecase) { + if (n == m) { + return rb_memcicmp(x, s, m) == 0 ? 0 : -1; + } + /* Prepare hash value */ + for (hy = hx = i = 0; i < d; ++i) { + hx = KR_REHASH(0, casetable[x[i]], hx); + hy = KR_REHASH(0, casetable[s[i]], hy); + } + /* Searching */ + while (hx != hy || rb_memcicmp(x, s, m)) { + if (s >= e) return -1; + hy = KR_REHASH(casetable[*s], casetable[*(s+d)], hy); + s++; + } + } + else { + if (n == m) { + return memcmp(x, s, m) == 0 ? 0 : -1; + } + /* Prepare hash value */ + for (hy = hx = i = 0; i < d; ++i) { + hx = KR_REHASH(0, x[i], hx); + hy = KR_REHASH(0, s[i], hy); + } + /* Searching */ + while (hx != hy || memcmp(x, s, m)) { + if (s >= e) return -1; + hy = KR_REHASH(*s, *(s+d), hy); + s++; + } + } + return s-y; +} + +#define REG_CASESTATE FL_USER0 +#define KCODE_NONE 0 +#define KCODE_EUC FL_USER1 +#define KCODE_SJIS FL_USER2 +#define KCODE_UTF8 FL_USER3 +#define KCODE_FIXED FL_USER4 +#define KCODE_MASK (KCODE_EUC|KCODE_SJIS|KCODE_UTF8) + +static int reg_kcode = DEFAULT_KCODE; + +static void +kcode_euc(re) + struct RRegexp *re; +{ + FL_UNSET(re, KCODE_MASK); + FL_SET(re, KCODE_EUC); + FL_SET(re, KCODE_FIXED); +} + +static void +kcode_sjis(re) + struct RRegexp *re; +{ + FL_UNSET(re, KCODE_MASK); + FL_SET(re, KCODE_SJIS); + FL_SET(re, KCODE_FIXED); +} + +static void +kcode_utf8(re) + struct RRegexp *re; +{ + FL_UNSET(re, KCODE_MASK); + FL_SET(re, KCODE_UTF8); + FL_SET(re, KCODE_FIXED); +} + +static void +kcode_none(re) + struct RRegexp *re; +{ + FL_UNSET(re, KCODE_MASK); + FL_SET(re, KCODE_FIXED); +} + +static int curr_kcode; + +static void +kcode_set_option(re) + VALUE re; +{ + if (!FL_TEST(re, KCODE_FIXED)) return; + + curr_kcode = RBASIC(re)->flags & KCODE_MASK; + if (reg_kcode == curr_kcode) return; + switch (curr_kcode) { + case KCODE_NONE: + onigenc_set_default_encoding(ONIG_ENCODING_ASCII); + break; + case KCODE_EUC: + onigenc_set_default_encoding(ONIG_ENCODING_EUC_JP); + break; + case KCODE_SJIS: + onigenc_set_default_encoding(ONIG_ENCODING_SJIS); + break; + case KCODE_UTF8: + onigenc_set_default_encoding(ONIG_ENCODING_UTF8); + break; + } +} + +static void +kcode_reset_option() +{ + if (reg_kcode == curr_kcode) return; + switch (reg_kcode) { + case KCODE_NONE: + onigenc_set_default_encoding(ONIG_ENCODING_ASCII); + break; + case KCODE_EUC: + onigenc_set_default_encoding(ONIG_ENCODING_EUC_JP); + break; + case KCODE_SJIS: + onigenc_set_default_encoding(ONIG_ENCODING_SJIS); + break; + case KCODE_UTF8: + onigenc_set_default_encoding(ONIG_ENCODING_UTF8); + break; + } +} + +int +rb_reg_mbclen2(c, re) + unsigned int c; + VALUE re; +{ + int len; + unsigned char uc = (unsigned char)c; + + if (!FL_TEST(re, KCODE_FIXED)) + return mbclen(uc); + kcode_set_option(re); + len = mbclen(uc); + kcode_reset_option(); + return len; +} + +static void +rb_reg_check(re) + VALUE re; +{ + if (!RREGEXP(re)->ptr || !RREGEXP(re)->str) { + rb_raise(rb_eTypeError, "uninitialized Regexp"); + } +} + +static void +rb_reg_expr_str(str, s, len) + VALUE str; + const char *s; + long len; +{ + const char *p, *pend; + int need_escape = 0; + + p = s; pend = p + len; + while (p<pend) { + if (*p == '/' || (!ISPRINT(*p) && !ismbchar(*p))) { + need_escape = 1; + break; + } + p += mbclen(*p); + } + if (!need_escape) { + rb_str_buf_cat(str, s, len); + } + else { + p = s; + while (p<pend) { + if (*p == '\\') { + int n = mbclen(p[1]) + 1; + rb_str_buf_cat(str, p, n); + p += n; + continue; + } + else if (*p == '/') { + char c = '\\'; + rb_str_buf_cat(str, &c, 1); + rb_str_buf_cat(str, p, 1); + } + else if (ismbchar(*p)) { + rb_str_buf_cat(str, p, mbclen(*p)); + p += mbclen(*p); + continue; + } + else if (ISPRINT(*p)) { + rb_str_buf_cat(str, p, 1); + } + else if (!ISSPACE(*p)) { + char b[8]; + + sprintf(b, "\\%03o", *p & 0377); + rb_str_buf_cat(str, b, 4); + } + else { + rb_str_buf_cat(str, p, 1); + } + p++; + } + } +} + +static VALUE +rb_reg_desc(s, len, re) + const char *s; + long len; + VALUE re; +{ + VALUE str = rb_str_buf_new2("/"); + + rb_reg_expr_str(str, s, len); + rb_str_buf_cat2(str, "/"); + if (re) { + rb_reg_check(re); + if (RREGEXP(re)->ptr->options & ONIG_OPTION_MULTILINE) + rb_str_buf_cat2(str, "m"); + if (RREGEXP(re)->ptr->options & ONIG_OPTION_IGNORECASE) + rb_str_buf_cat2(str, "i"); + if (RREGEXP(re)->ptr->options & ONIG_OPTION_EXTEND) + rb_str_buf_cat2(str, "x"); + + if (FL_TEST(re, KCODE_FIXED)) { + switch ((RBASIC(re)->flags & KCODE_MASK)) { + case KCODE_NONE: + rb_str_buf_cat2(str, "n"); + break; + case KCODE_EUC: + rb_str_buf_cat2(str, "e"); + break; + case KCODE_SJIS: + rb_str_buf_cat2(str, "s"); + break; + case KCODE_UTF8: + rb_str_buf_cat2(str, "u"); + break; + } + } + } + OBJ_INFECT(str, re); + return str; +} + + +/* + * call-seq: + * rxp.source => str + * + * Returns the original string of the pattern. + * + * /ab+c/ix.source #=> "ab+c" + */ + +static VALUE +rb_reg_source(re) + VALUE re; +{ + VALUE str; + + rb_reg_check(re); + str = rb_str_new(RREGEXP(re)->str,RREGEXP(re)->len); + if (OBJ_TAINTED(re)) OBJ_TAINT(str); + return str; +} + +/* + * call-seq: + * rxp.inspect => string + * + * Produce a nicely formatted string-version of _rxp_. Perhaps surprisingly, + * <code>#inspect</code> actually produces the more natural version of + * the string than <code>#to_s</code>. + * + * /ab+c/ix.to_s #=> /ab+c/ix +*/ + +static VALUE +rb_reg_inspect(re) + VALUE re; +{ + rb_reg_check(re); + return rb_reg_desc(RREGEXP(re)->str, RREGEXP(re)->len, re); +} + + +/* + * call-seq: + * rxp.to_s => str + * + * Returns a string containing the regular expression and its options (using the + * <code>(?xxx:yyy)</code> notation. This string can be fed back in to + * <code>Regexp::new</code> to a regular expression with the same semantics as + * the original. (However, <code>Regexp#==</code> may not return true when + * comparing the two, as the source of the regular expression itself may + * differ, as the example shows). <code>Regexp#inspect</code> produces a + * generally more readable version of <i>rxp</i>. + * + * r1 = /ab+c/ix #=> /ab+c/ix + * s1 = r1.to_s #=> "(?ix-m:ab+c)" + * r2 = Regexp.new(s1) #=> /(?ix-m:ab+c)/ + * r1 == r2 #=> false + * r1.source #=> "ab+c" + * r2.source #=> "(?ix-m:ab+c)" + */ + +static VALUE +rb_reg_to_s(re) + VALUE re; +{ + int options; + const int embeddable = ONIG_OPTION_MULTILINE|ONIG_OPTION_IGNORECASE|ONIG_OPTION_EXTEND; + long len; + const char* ptr; + VALUE str = rb_str_buf_new2("(?"); + + rb_reg_check(re); + + options = RREGEXP(re)->ptr->options; + ptr = RREGEXP(re)->str; + len = RREGEXP(re)->len; + again: + if (len >= 4 && ptr[0] == '(' && ptr[1] == '?') { + int err = 1; + ptr += 2; + if ((len -= 2) > 0) { + do { + if (*ptr == 'm') { + options |= ONIG_OPTION_MULTILINE; + } + else if (*ptr == 'i') { + options |= ONIG_OPTION_IGNORECASE; + } + else if (*ptr == 'x') { + options |= ONIG_OPTION_EXTEND; + } + else break; + ++ptr; + } while (--len > 0); + } + if (len > 1 && *ptr == '-') { + ++ptr; + --len; + do { + if (*ptr == 'm') { + options &= ~ONIG_OPTION_MULTILINE; + } + else if (*ptr == 'i') { + options &= ~ONIG_OPTION_IGNORECASE; + } + else if (*ptr == 'x') { + options &= ~ONIG_OPTION_EXTEND; + } + else break; + ++ptr; + } while (--len > 0); + } + if (*ptr == ')') { + --len; + ++ptr; + goto again; + } + if (*ptr == ':' && ptr[len-1] == ')') { + int r; + Regexp *rp; + kcode_set_option(re); + r = onig_alloc_init(&rp, ONIG_OPTION_DEFAULT, + ONIGENC_AMBIGUOUS_MATCH_DEFAULT, + onigenc_get_default_encoding(), + OnigDefaultSyntax); + if (r == 0) { + ++ptr; + len -= 2; + err = (onig_compile(rp, ptr, ptr + len, NULL) != 0); + } + kcode_reset_option(); + onig_free(rp); + } + if (err) { + options = RREGEXP(re)->ptr->options; + ptr = RREGEXP(re)->str; + len = RREGEXP(re)->len; + } + } + + if (options & ONIG_OPTION_MULTILINE) rb_str_buf_cat2(str, "m"); + if (options & ONIG_OPTION_IGNORECASE) rb_str_buf_cat2(str, "i"); + if (options & ONIG_OPTION_EXTEND) rb_str_buf_cat2(str, "x"); + + if ((options & embeddable) != embeddable) { + rb_str_buf_cat2(str, "-"); + if (!(options & ONIG_OPTION_MULTILINE)) rb_str_buf_cat2(str, "m"); + if (!(options & ONIG_OPTION_IGNORECASE)) rb_str_buf_cat2(str, "i"); + if (!(options & ONIG_OPTION_EXTEND)) rb_str_buf_cat2(str, "x"); + } + + rb_str_buf_cat2(str, ":"); + rb_reg_expr_str(str, ptr, len); + rb_str_buf_cat2(str, ")"); + + OBJ_INFECT(str, re); + return str; +} + +static void +rb_reg_raise(s, len, err, re, ce) + const char *s; + long len; + const char *err; + VALUE re; + int ce; +{ + VALUE desc = rb_reg_desc(s, len, re); + + if (ce) + rb_compile_error("%s: %s", err, RSTRING(desc)->ptr); + else + rb_raise(rb_eRegexpError, "%s: %s", err, RSTRING(desc)->ptr); +} + + +/* + * call-seq: + * rxp.casefold? => true or false + * + * Returns the value of the case-insensitive flag. + */ + +static VALUE +rb_reg_casefold_p(re) + VALUE re; +{ + rb_reg_check(re); + if (RREGEXP(re)->ptr->options & ONIG_OPTION_IGNORECASE) return Qtrue; + return Qfalse; +} + + +/* + * call-seq: + * rxp.options => fixnum + * + * Returns the set of bits corresponding to the options used when creating this + * Regexp (see <code>Regexp::new</code> for details. Note that additional bits + * may be set in the returned options: these are used internally by the regular + * expression code. These extra bits are ignored if the options are passed to + * <code>Regexp::new</code>. + * + * Regexp::IGNORECASE #=> 1 + * Regexp::EXTENDED #=> 2 + * Regexp::MULTILINE #=> 4 + * + * /cat/.options #=> 128 + * /cat/ix.options #=> 131 + * Regexp.new('cat', true).options #=> 129 + * Regexp.new('cat', 0, 's').options #=> 384 + * + * r = /cat/ix + * Regexp.new(r.source, r.options) #=> /cat/ix + */ + +static VALUE +rb_reg_options_m(re) + VALUE re; +{ + int options = rb_reg_options(re); + return INT2NUM(options); +} + + +/* + * call-seq: + * rxp.kcode => str + * + * Returns the character set code for the regexp. + */ + +static VALUE +rb_reg_kcode_m(re) + VALUE re; +{ + char *kcode; + + if (FL_TEST(re, KCODE_FIXED)) { + switch (RBASIC(re)->flags & KCODE_MASK) { + case KCODE_NONE: + kcode = "none"; break; + case KCODE_EUC: + kcode = "euc"; break; + case KCODE_SJIS: + kcode = "sjis"; break; + case KCODE_UTF8: + kcode = "utf8"; break; + default: + rb_bug("unknown kcode - should not happen"); + break; + } + return rb_str_new2(kcode); + } + return Qnil; +} + +static Regexp* +make_regexp(s, len, flags, ce) + const char *s; + long len; + int flags; + int ce; +{ + Regexp *rp; + char err[ONIG_MAX_ERROR_MESSAGE_LEN]; + int r; + OnigErrorInfo einfo; + + /* Handle escaped characters first. */ + + /* Build a copy of the string (in dest) with the + escaped characters translated, and generate the regex + from that. + */ + + r = onig_alloc_init(&rp, flags, + ONIGENC_AMBIGUOUS_MATCH_DEFAULT, + onigenc_get_default_encoding(), + OnigDefaultSyntax); + if (r) { + onig_error_code_to_str((UChar* )err, r); + rb_reg_raise(s, len, err, 0, ce); + } + + r = onig_compile(rp, (UChar* )s, (UChar* )(s + len), &einfo); + + if (r != 0) { + onig_free(rp); + (void )onig_error_code_to_str((UChar* )err, r, &einfo); + rb_reg_raise(s, len, err, 0, ce); + } + return rp; +} + + +/* + * Document-class: MatchData + * + * <code>MatchData</code> is the type of the special variable <code>$~</code>, + * and is the type of the object returned by <code>Regexp#match</code> and + * <code>Regexp#last_match</code>. It encapsulates all the results of a pattern + * match, results normally accessed through the special variables + * <code>$&</code>, <code>$'</code>, <code>$`</code>, <code>$1</code>, + * <code>$2</code>, and so on. <code>Matchdata</code> is also known as + * <code>MatchingData</code>. + * + */ + +static VALUE rb_cMatch; + +static VALUE match_alloc _((VALUE)); +static VALUE +match_alloc(klass) + VALUE klass; +{ + NEWOBJ(match, struct RMatch); + OBJSETUP(match, klass, T_MATCH); + + match->str = 0; + match->regs = 0; + match->regs = ALLOC(struct re_registers); + MEMZERO(match->regs, struct re_registers, 1); + + return (VALUE)match; +} + +/* :nodoc: */ +static VALUE +match_init_copy(obj, orig) + VALUE obj, orig; +{ + if (obj == orig) return obj; + + if (!rb_obj_is_instance_of(orig, rb_obj_class(obj))) { + rb_raise(rb_eTypeError, "wrong argument class"); + } + RMATCH(obj)->str = RMATCH(orig)->str; + onig_region_free(RMATCH(obj)->regs, 0); + RMATCH(obj)->regs->allocated = 0; + onig_region_copy(RMATCH(obj)->regs, RMATCH(orig)->regs); + + return obj; +} + + +/* + * call-seq: + * mtch.length => integer + * mtch.size => integer + * + * Returns the number of elements in the match array. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138.") + * m.length #=> 5 + * m.size #=> 5 + */ + +static VALUE +match_size(match) + VALUE match; +{ + return INT2FIX(RMATCH(match)->regs->num_regs); +} + + +/* + * call-seq: + * mtch.offset(n) => array + * + * Returns a two-element array containing the beginning and ending offsets of + * the <em>n</em>th match. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138.") + * m.offset(0) #=> [1, 7] + * m.offset(4) #=> [6, 7] + */ + +static VALUE +match_offset(match, n) + VALUE match, n; +{ + int i = NUM2INT(n); + + if (i < 0 || RMATCH(match)->regs->num_regs <= i) + rb_raise(rb_eIndexError, "index %d out of matches", i); + + if (RMATCH(match)->regs->beg[i] < 0) + return rb_assoc_new(Qnil, Qnil); + + return rb_assoc_new(INT2FIX(RMATCH(match)->regs->beg[i]), + INT2FIX(RMATCH(match)->regs->end[i])); +} + + +/* + * call-seq: + * mtch.begin(n) => integer + * + * Returns the offset of the start of the <em>n</em>th element of the match + * array in the string. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138.") + * m.begin(0) #=> 1 + * m.begin(2) #=> 2 + */ + +static VALUE +match_begin(match, n) + VALUE match, n; +{ + int i = NUM2INT(n); + + if (i < 0 || RMATCH(match)->regs->num_regs <= i) + rb_raise(rb_eIndexError, "index %d out of matches", i); + + if (RMATCH(match)->regs->beg[i] < 0) + return Qnil; + + return INT2FIX(RMATCH(match)->regs->beg[i]); +} + + +/* + * call-seq: + * mtch.end(n) => integer + * + * Returns the offset of the character immediately following the end of the + * <em>n</em>th element of the match array in the string. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138.") + * m.end(0) #=> 7 + * m.end(2) #=> 3 + */ + +static VALUE +match_end(match, n) + VALUE match, n; +{ + int i = NUM2INT(n); + + if (i < 0 || RMATCH(match)->regs->num_regs <= i) + rb_raise(rb_eIndexError, "index %d out of matches", i); + + if (RMATCH(match)->regs->beg[i] < 0) + return Qnil; + + return INT2FIX(RMATCH(match)->regs->end[i]); +} + +#define MATCH_BUSY FL_USER2 + +void +rb_match_busy(match) + VALUE match; +{ + FL_SET(match, MATCH_BUSY); +} + +int ruby_ignorecase; +static int may_need_recompile; + +static void +rb_reg_prepare_re(re) + VALUE re; +{ + int need_recompile = 0; + int state; + + rb_reg_check(re); + state = FL_TEST(re, REG_CASESTATE); + /* ignorecase status */ + if (ruby_ignorecase && !state) { + FL_SET(re, REG_CASESTATE); + RREGEXP(re)->ptr->options |= ONIG_OPTION_IGNORECASE; + need_recompile = 1; + } + if (!ruby_ignorecase && state) { + FL_UNSET(re, REG_CASESTATE); + RREGEXP(re)->ptr->options &= ~ONIG_OPTION_IGNORECASE; + need_recompile = 1; + } + + if (!FL_TEST(re, KCODE_FIXED) && + (RBASIC(re)->flags & KCODE_MASK) != reg_kcode) { + need_recompile = 1; + RBASIC(re)->flags &= ~KCODE_MASK; + RBASIC(re)->flags |= reg_kcode; + } + + if (need_recompile) { + char err[ONIG_MAX_ERROR_MESSAGE_LEN]; + int r; + OnigErrorInfo einfo; + regex_t *reg; + UChar *pattern; + + if (FL_TEST(re, KCODE_FIXED)) + kcode_set_option(re); + rb_reg_check(re); + reg = RREGEXP(re)->ptr; + pattern = ((UChar* )RREGEXP(re)->str); + r = onig_recompile(reg, pattern, pattern + RREGEXP(re)->len, + reg->options, onigenc_get_default_encoding(), + OnigDefaultSyntax, &einfo); + + if (r != 0) { + (void )onig_error_code_to_str((UChar* )err, r, &einfo); + rb_reg_raise(pattern, RREGEXP(re)->len, err, re, Qfalse); + } + } +} + +long +rb_reg_adjust_startpos(re, str, pos, reverse) + VALUE re, str; + long pos, reverse; +{ + long range; + OnigEncoding enc; + UChar *p, *string; + + rb_reg_check(re); + if (may_need_recompile) rb_reg_prepare_re(re); + + if (FL_TEST(re, KCODE_FIXED)) + kcode_set_option(re); + else if (reg_kcode != curr_kcode) + kcode_reset_option(); + + if (reverse) { + range = -pos; + } + else { + range = RSTRING(str)->len - pos; + } + + enc = (RREGEXP(re)->ptr)->enc; + + if (pos > 0 && ONIGENC_MBC_MAXLEN(enc) != 1 && pos < RSTRING(str)->len) { + string = (UChar* )RSTRING(str)->ptr; + + if (range > 0) { + p = onigenc_get_right_adjust_char_head(enc, string, string + pos); + } + else { + p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, string, string + pos); + } + return p - string; + } + + return pos; +} + +long +rb_reg_search(re, str, pos, reverse) + VALUE re, str; + long pos, reverse; +{ + long result; + VALUE match; + static struct re_registers regs; + long range; + + if (pos > RSTRING(str)->len || pos < 0) { + rb_backref_set(Qnil); + return -1; + } + + rb_reg_check(re); + if (may_need_recompile) rb_reg_prepare_re(re); + + if (FL_TEST(re, KCODE_FIXED)) + kcode_set_option(re); + else if (reg_kcode != curr_kcode) + kcode_reset_option(); + + if (reverse) { + range = -pos; + } + else { + range = RSTRING(str)->len - pos; + } + + result = onig_search(RREGEXP(re)->ptr, + (UChar* )(RSTRING(str)->ptr), + ((UChar* )(RSTRING(str)->ptr) + RSTRING(str)->len), + ((UChar* )(RSTRING(str)->ptr) + pos), + ((UChar* )(RSTRING(str)->ptr) + pos + range), + ®s, ONIG_OPTION_NONE); + + if (FL_TEST(re, KCODE_FIXED)) + kcode_reset_option(); + + if (result < 0) { + if (result == ONIG_MISMATCH) { + rb_backref_set(Qnil); + return result; + } + else { + char err[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str((UChar* )err, result); + rb_reg_raise(RREGEXP(re)->str, RREGEXP(re)->len, err, 0, Qfalse); + } + } + + match = rb_backref_get(); + if (NIL_P(match) || FL_TEST(match, MATCH_BUSY)) { + match = match_alloc(rb_cMatch); + } + else { + if (rb_safe_level() >= 3) + OBJ_TAINT(match); + else + FL_UNSET(match, FL_TAINT); + } + + onig_region_copy(RMATCH(match)->regs, ®s); + RMATCH(match)->str = rb_str_new4(str); + rb_backref_set(match); + + OBJ_INFECT(match, re); + OBJ_INFECT(match, str); + return result; +} + +VALUE +rb_reg_nth_defined(nth, match) + int nth; + VALUE match; +{ + if (NIL_P(match)) return Qnil; + if (nth >= RMATCH(match)->regs->num_regs) { + return Qnil; + } + if (nth < 0) { + nth += RMATCH(match)->regs->num_regs; + if (nth <= 0) return Qnil; + } + if (RMATCH(match)->BEG(nth) == -1) return Qfalse; + return Qtrue; +} + +VALUE +rb_reg_nth_match(nth, match) + int nth; + VALUE match; +{ + VALUE str; + long start, end, len; + + if (NIL_P(match)) return Qnil; + if (nth >= RMATCH(match)->regs->num_regs) { + return Qnil; + } + if (nth < 0) { + nth += RMATCH(match)->regs->num_regs; + if (nth <= 0) return Qnil; + } + start = RMATCH(match)->BEG(nth); + if (start == -1) return Qnil; + end = RMATCH(match)->END(nth); + len = end - start; + str = rb_str_substr(RMATCH(match)->str, start, len); + OBJ_INFECT(str, match); + return str; +} + +VALUE +rb_reg_last_match(match) + VALUE match; +{ + return rb_reg_nth_match(0, match); +} + + +/* + * call-seq: + * mtch.pre_match => str + * + * Returns the portion of the original string before the current match. + * Equivalent to the special variable <code>$`</code>. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138.") + * m.pre_match #=> "T" + */ + +VALUE +rb_reg_match_pre(match) + VALUE match; +{ + VALUE str; + + if (NIL_P(match)) return Qnil; + if (RMATCH(match)->BEG(0) == -1) return Qnil; + str = rb_str_substr(RMATCH(match)->str, 0, RMATCH(match)->BEG(0)); + if (OBJ_TAINTED(match)) OBJ_TAINT(str); + return str; +} + + +/* + * call-seq: + * mtch.post_match => str + * + * Returns the portion of the original string after the current match. + * Equivalent to the special variable <code>$'</code>. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie") + * m.post_match #=> ": The Movie" + */ + +VALUE +rb_reg_match_post(match) + VALUE match; +{ + VALUE str; + long pos; + + if (NIL_P(match)) return Qnil; + if (RMATCH(match)->BEG(0) == -1) return Qnil; + str = RMATCH(match)->str; + pos = RMATCH(match)->END(0); + str = rb_str_substr(str, pos, RSTRING(str)->len - pos); + if (OBJ_TAINTED(match)) OBJ_TAINT(str); + return str; +} + +VALUE +rb_reg_match_last(match) + VALUE match; +{ + int i; + + if (NIL_P(match)) return Qnil; + if (RMATCH(match)->BEG(0) == -1) return Qnil; + + for (i=RMATCH(match)->regs->num_regs-1; RMATCH(match)->BEG(i) == -1 && i > 0; i--) + ; + if (i == 0) return Qnil; + return rb_reg_nth_match(i, match); +} + +static VALUE +last_match_getter() +{ + return rb_reg_last_match(rb_backref_get()); +} + +static VALUE +prematch_getter() +{ + return rb_reg_match_pre(rb_backref_get()); +} + +static VALUE +postmatch_getter() +{ + return rb_reg_match_post(rb_backref_get()); +} + +static VALUE +last_paren_match_getter() +{ + return rb_reg_match_last(rb_backref_get()); +} + +static VALUE +match_array(match, start) + VALUE match; + int start; +{ + struct re_registers *regs = RMATCH(match)->regs; + VALUE ary = rb_ary_new2(regs->num_regs); + VALUE target = RMATCH(match)->str; + int i; + int taint = OBJ_TAINTED(match); + + for (i=start; i<regs->num_regs; i++) { + if (regs->beg[i] == -1) { + rb_ary_push(ary, Qnil); + } + else { + VALUE str = rb_str_substr(target, regs->beg[i], regs->end[i]-regs->beg[i]); + if (taint) OBJ_TAINT(str); + rb_ary_push(ary, str); + } + } + return ary; +} + + +/* [MG]:FIXME: I put parens around the /.../.match() in the first line of the + second example to prevent the '*' followed by a '/' from ending the + comment. */ + +/* + * call-seq: + * mtch.to_a => anArray + * + * Returns the array of matches. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138.") + * m.to_a #=> ["HX1138", "H", "X", "113", "8"] + * + * Because <code>to_a</code> is called when expanding + * <code>*</code><em>variable</em>, there's a useful assignment + * shortcut for extracting matched fields. This is slightly slower than + * accessing the fields directly (as an intermediate array is + * generated). + * + * all,f1,f2,f3 = *(/(.)(.)(\d+)(\d)/.match("THX1138.")) + * all #=> "HX1138" + * f1 #=> "H" + * f2 #=> "X" + * f3 #=> "113" + */ + +static VALUE +match_to_a(match) + VALUE match; +{ + return match_array(match, 0); +} + + +/* + * call-seq: + * mtch.captures => array + * + * Returns the array of captures; equivalent to <code>mtch.to_a[1..-1]</code>. + * + * f1,f2,f3,f4 = /(.)(.)(\d+)(\d)/.match("THX1138.").captures + * f1 #=> "H" + * f2 #=> "X" + * f3 #=> "113" + * f4 #=> "8" + */ +static VALUE +match_captures(match) + VALUE match; +{ + return match_array(match, 1); +} + + +/* + * call-seq: + * mtch[i] => obj + * mtch[start, length] => array + * mtch[range] => array + * + * Match Reference---<code>MatchData</code> acts as an array, and may be + * accessed using the normal array indexing techniques. <i>mtch</i>[0] is + * equivalent to the special variable <code>$&</code>, and returns the entire + * matched string. <i>mtch</i>[1], <i>mtch</i>[2], and so on return the values + * of the matched backreferences (portions of the pattern between parentheses). + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138.") + * m[0] #=> "HX1138" + * m[1, 2] #=> ["H", "X"] + * m[1..3] #=> ["H", "X", "113"] + * m[-3, 2] #=> ["X", "113"] + */ + +static VALUE +match_aref(argc, argv, match) + int argc; + VALUE *argv; + VALUE match; +{ + VALUE idx, rest; + + rb_scan_args(argc, argv, "11", &idx, &rest); + + if (!NIL_P(rest) || !FIXNUM_P(idx) || FIX2INT(idx) < 0) { + return rb_ary_aref(argc, argv, match_to_a(match)); + } + return rb_reg_nth_match(FIX2INT(idx), match); +} + +static VALUE match_entry _((VALUE, long)); +static VALUE +match_entry(match, n) + VALUE match; + long n; +{ + return rb_reg_nth_match(n, match); +} + + +/* + * call-seq: + * mtch.select([index]*) => array + * + * Uses each <i>index</i> to access the matching values, returning an array of + * the corresponding matches. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie") + * m.to_a #=> ["HX1138", "H", "X", "113", "8"] + * m.select(0, 2, -2) #=> ["HX1138", "X", "113"] + */ + +static VALUE +match_values_at(argc, argv, match) + int argc; + VALUE *argv; + VALUE match; +{ + return rb_get_values_at(match, RMATCH(match)->regs->num_regs, argc, argv, match_entry); +} + + +/* + * call-seq: + * mtch.select([index]*) => array + * + * Uses each <i>index</i> to access the matching values, returning an + * array of the corresponding matches. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie") + * m.to_a #=> ["HX1138", "H", "X", "113", "8"] + * m.select(0, 2, -2) #=> ["HX1138", "X", "113"] + */ + +static VALUE +match_select(argc, argv, match) + int argc; + VALUE *argv; + VALUE match; +{ + if (argc > 0) { + rb_raise(rb_eArgError, "wrong number of arguments (%d for 0)", argc); + } + else { + struct re_registers *regs = RMATCH(match)->regs; + VALUE target = RMATCH(match)->str; + VALUE result = rb_ary_new(); + int i; + int taint = OBJ_TAINTED(match); + + for (i=0; i<regs->num_regs; i++) { + VALUE str = rb_str_substr(target, regs->beg[i], regs->end[i]-regs->beg[i]); + if (taint) OBJ_TAINT(str); + if (RTEST(rb_yield(str))) { + rb_ary_push(result, str); + } + } + return result; + } +} + + +/* + * call-seq: + * mtch.to_s => str + * + * Returns the entire matched string. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138.") + * m.to_s #=> "HX1138" + */ + +static VALUE +match_to_s(match) + VALUE match; +{ + VALUE str = rb_reg_last_match(match); + + if (NIL_P(str)) str = rb_str_new(0,0); + if (OBJ_TAINTED(match)) OBJ_TAINT(str); + if (OBJ_TAINTED(RMATCH(match)->str)) OBJ_TAINT(str); + return str; +} + + +/* + * call-seq: + * mtch.string => str + * + * Returns a frozen copy of the string passed in to <code>match</code>. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138.") + * m.string #=> "THX1138." + */ + +static VALUE +match_string(match) + VALUE match; +{ + return RMATCH(match)->str; /* str is frozen */ +} + +VALUE rb_cRegexp; + +static void +rb_reg_initialize(obj, s, len, options, ce) + VALUE obj; + const char *s; + long len; + int options; /* CASEFOLD = 1 */ + /* EXTENDED = 2 */ + /* MULTILINE = 4 */ + /* CODE_NONE = 16 */ + /* CODE_EUC = 32 */ + /* CODE_SJIS = 48 */ + /* CODE_UTF8 = 64 */ + int ce; /* call rb_compile_error() */ +{ + struct RRegexp *re = RREGEXP(obj); + + if (re->ptr) onig_free(re->ptr); + if (re->str) free(re->str); + re->ptr = 0; + re->str = 0; + + switch (options & ~0xf) { + case 0: + default: + FL_SET(re, reg_kcode); + break; + case 16: + kcode_none(re); + break; + case 32: + kcode_euc(re); + break; + case 48: + kcode_sjis(re); + break; + case 64: + kcode_utf8(re); + break; + } + + if (options & ~0xf) { + kcode_set_option((VALUE)re); + } + if (ruby_ignorecase) { + options |= ONIG_OPTION_IGNORECASE; + FL_SET(re, REG_CASESTATE); + } + re->ptr = make_regexp(s, len, options & 0xf, ce); + re->str = ALLOC_N(char, len+1); + memcpy(re->str, s, len); + re->str[len] = '\0'; + re->len = len; + if (options & ~0xf) { + kcode_reset_option(); + } +} + +static VALUE rb_reg_s_alloc _((VALUE)); +static VALUE +rb_reg_s_alloc(klass) + VALUE klass; +{ + NEWOBJ(re, struct RRegexp); + OBJSETUP(re, klass, T_REGEXP); + + re->ptr = 0; + re->len = 0; + re->str = 0; + + return (VALUE)re; +} + +VALUE +rb_reg_new(s, len, options) + const char *s; + long len; + int options; +{ + VALUE re = rb_reg_s_alloc(rb_cRegexp); + + rb_reg_initialize(re, s, len, options, Qfalse); + return (VALUE)re; +} + +VALUE +rb_reg_compile(s, len, options) + const char *s; + long len; + int options; +{ + VALUE re = rb_reg_s_alloc(rb_cRegexp); + + rb_reg_initialize(re, s, len, options, Qtrue); + return (VALUE)re; +} + +static int case_cache; +static int kcode_cache; +static VALUE reg_cache; + +VALUE +rb_reg_regcomp(str) + VALUE str; +{ + if (reg_cache && RREGEXP(reg_cache)->len == RSTRING(str)->len + && case_cache == ruby_ignorecase + && kcode_cache == reg_kcode + && memcmp(RREGEXP(reg_cache)->str, RSTRING(str)->ptr, RSTRING(str)->len) == 0) + return reg_cache; + + case_cache = ruby_ignorecase; + kcode_cache = reg_kcode; + return reg_cache = rb_reg_new(RSTRING(str)->ptr, RSTRING(str)->len, + ruby_ignorecase); +} + +static int +rb_reg_cur_kcode(re) + VALUE re; +{ + if (FL_TEST(re, KCODE_FIXED)) { + return RBASIC(re)->flags & KCODE_MASK; + } + return 0; +} + +/* + * call-seq: + * rxp.hash => fixnum + * + * Produce a hash based on the text and options of this regular expression. + */ + +static VALUE +rb_reg_hash(re) + VALUE re; +{ + int hashval, len; + char *p; + + rb_reg_check(re); + hashval = RREGEXP(re)->ptr->options; + len = RREGEXP(re)->len; + p = RREGEXP(re)->str; + while (len--) { + hashval = hashval * 33 + *p++; + } + hashval = hashval + (hashval>>5); + + return INT2FIX(hashval); +} + + +/* + * call-seq: + * rxp == other_rxp => true or false + * rxp.eql?(other_rxp) => true or false + * + * Equality---Two regexps are equal if their patterns are identical, they have + * the same character set code, and their <code>casefold?</code> values are the + * same. + * + * /abc/ == /abc/x #=> false + * /abc/ == /abc/i #=> false + * /abc/u == /abc/n #=> false + */ + +static VALUE +rb_reg_equal(re1, re2) + VALUE re1, re2; +{ + if (re1 == re2) return Qtrue; + if (TYPE(re2) != T_REGEXP) return Qfalse; + rb_reg_check(re1); rb_reg_check(re2); + if (RREGEXP(re1)->len != RREGEXP(re2)->len) return Qfalse; + if (memcmp(RREGEXP(re1)->str, RREGEXP(re2)->str, RREGEXP(re1)->len) == 0 && + rb_reg_cur_kcode(re1) == rb_reg_cur_kcode(re2) && + RREGEXP(re1)->ptr->options == RREGEXP(re2)->ptr->options) { + return Qtrue; + } + return Qfalse; +} + +static VALUE +rb_reg_match_pos(re, str, pos) + VALUE re, str; + long pos; +{ + if (NIL_P(str)) { + rb_backref_set(Qnil); + return Qnil; + } + StringValue(str); + if (pos != 0) { + if (pos < 0) { + pos += RSTRING(str)->len; + if (pos < 0) { + return Qnil; + } + } + pos = rb_reg_adjust_startpos(re, str, pos, 0); + } + pos = rb_reg_search(re, str, pos, 0); + if (pos < 0) { + return Qnil; + } + return LONG2FIX(pos); +} + +/* + * call-seq: + * rxp =~ str => integer or nil + * + * Match---Matches <i>rxp</i> against <i>str</i>. + * + * /at/ =~ "input data" #=> 7 + */ + +VALUE +rb_reg_match(re, str) + VALUE re, str; +{ + return rb_reg_match_pos(re, str, 0); +} + +/* + * call-seq: + * rxp === str => true or false + * + * Case Equality---Synonym for <code>Regexp#=~</code> used in case statements. + * + * a = "HELLO" + * case a + * when /^[a-z]*$/; print "Lower case\n" + * when /^[A-Z]*$/; print "Upper case\n" + * else; print "Mixed case\n" + * end + * + * <em>produces:</em> + * + * Upper case + */ + +VALUE +rb_reg_eqq(re, str) + VALUE re, str; +{ + long start; + + if (TYPE(str) != T_STRING) { + str = rb_check_string_type(str); + if (NIL_P(str)) { + rb_backref_set(Qnil); + return Qfalse; + } + } + StringValue(str); + start = rb_reg_search(re, str, 0, 0); + if (start < 0) { + return Qfalse; + } + return Qtrue; +} + + +/* + * call-seq: + * ~ rxp => integer or nil + * + * Match---Matches <i>rxp</i> against the contents of <code>$_</code>. + * Equivalent to <code><i>rxp</i> =~ $_</code>. + * + * $_ = "input data" + * ~ /at/ #=> 7 + */ + +VALUE +rb_reg_match2(re) + VALUE re; +{ + long start; + VALUE line = rb_lastline_get(); + + if (TYPE(line) != T_STRING) { + rb_backref_set(Qnil); + return Qnil; + } + + start = rb_reg_search(re, line, 0, 0); + if (start < 0) { + return Qnil; + } + return LONG2FIX(start); +} + + +/* + * call-seq: + * rxp.match(str) => matchdata or nil + * rxp.match(str,pos) => matchdata or nil + * + * Returns a <code>MatchData</code> object describing the match, or + * <code>nil</code> if there was no match. This is equivalent to retrieving the + * value of the special variable <code>$~</code> following a normal match. + * If the second parameter is present, it specifies the position in the string + * to begin the search. + * + * /(.)(.)(.)/.match("abc")[2] #=> "b" + * /(.)(.)/.match("abc", 1)[2] #=> "c" + */ + +static VALUE +rb_reg_match_m(argc, argv, re) + int argc; + VALUE *argv; + VALUE re; +{ + VALUE result, str, initpos; + long pos; + + if (rb_scan_args(argc, argv, "11", &str, &initpos) == 2) { + pos = NUM2LONG(initpos); + } + else { + pos = 0; + } + + result = rb_reg_match_pos(re, str, pos); + if (NIL_P(result)) { + rb_backref_set(Qnil); + return Qnil; + } + result = rb_backref_get(); + rb_match_busy(result); + return result; +} + +/* + * Document-method: compile + * + * Synonym for <code>Regexp.new</code> + */ + + +/* + * call-seq: + * Regexp.new(string [, options [, lang]]) => regexp + * Regexp.new(regexp) => regexp + * Regexp.compile(string [, options [, lang]]) => regexp + * Regexp.compile(regexp) => regexp + * + * Constructs a new regular expression from <i>pattern</i>, which can be either + * a <code>String</code> or a <code>Regexp</code> (in which case that regexp's + * options are propagated, and new options may not be specified (a change as of + * Ruby 1.8). If <i>options</i> is a <code>Fixnum</code>, it should be one or + * more of the constants <code>Regexp::EXTENDED</code>, + * <code>Regexp::IGNORECASE</code>, and <code>Regexp::MULTILINE</code>, + * <em>or</em>-ed together. Otherwise, if <i>options</i> is not + * <code>nil</code>, the regexp will be case insensitive. The <i>lang</i> + * parameter enables multibyte support for the regexp: `n', `N' = none, `e', + * `E' = EUC, `s', `S' = SJIS, `u', `U' = UTF-8. + * + * r1 = Regexp.new('^a-z+:\\s+\w+') #=> /^a-z+:\s+\w+/ + * r2 = Regexp.new('cat', true) #=> /cat/i + * r3 = Regexp.new('dog', Regexp::EXTENDED) #=> /dog/x + * r4 = Regexp.new(r2) #=> /cat/i + */ + +static VALUE +rb_reg_initialize_m(argc, argv, self) + int argc; + VALUE *argv; + VALUE self; +{ + const char *s; + long len; + int flags = 0; + + rb_check_frozen(self); + if (argc == 0 || argc > 3) { + rb_raise(rb_eArgError, "wrong number of arguments"); + } + if (TYPE(argv[0]) == T_REGEXP) { + if (argc > 1) { + rb_warn("flags%s ignored", (argc == 3) ? " and encoding": ""); + } + rb_reg_check(argv[0]); + flags = RREGEXP(argv[0])->ptr->options & 0xf; + if (FL_TEST(argv[0], KCODE_FIXED)) { + switch (RBASIC(argv[0])->flags & KCODE_MASK) { + case KCODE_NONE: + flags |= 16; + break; + case KCODE_EUC: + flags |= 32; + break; + case KCODE_SJIS: + flags |= 48; + break; + case KCODE_UTF8: + flags |= 64; + break; + default: + break; + } + } + s = RREGEXP(argv[0])->str; + len = RREGEXP(argv[0])->len; + } + else { + if (argc >= 2) { + if (FIXNUM_P(argv[1])) flags = FIX2INT(argv[1]); + else if (RTEST(argv[1])) flags = ONIG_OPTION_IGNORECASE; + } + if (argc == 3 && !NIL_P(argv[2])) { + char *kcode = StringValuePtr(argv[2]); + + flags &= ~0x70; + switch (kcode[0]) { + case 'n': case 'N': + flags |= 16; + break; + case 'e': case 'E': + flags |= 32; + break; + case 's': case 'S': + flags |= 48; + break; + case 'u': case 'U': + flags |= 64; + break; + default: + break; + } + } + s = StringValuePtr(argv[0]); + len = RSTRING(argv[0])->len; + } + rb_reg_initialize(self, s, len, flags, Qfalse); + return self; +} + +VALUE +rb_reg_quote(str) + VALUE str; +{ + char *s, *send, *t; + VALUE tmp; + int c; + + s = RSTRING(str)->ptr; + send = s + RSTRING(str)->len; + for (; s < send; s++) { + c = *s; + if (ismbchar(*s)) { + int n = mbclen(*s); + + while (n-- && s < send) + s++; + s--; + continue; + } + switch (c) { + case '[': case ']': case '{': case '}': + case '(': case ')': case '|': case '-': + case '*': case '.': case '\\': + case '?': case '+': case '^': case '$': + case ' ': case '#': + case '\t': case '\f': case '\n': case '\r': + goto meta_found; + } + } + return str; + + meta_found: + tmp = rb_str_new(0, RSTRING(str)->len*2); + t = RSTRING(tmp)->ptr; + /* copy upto metacharacter */ + memcpy(t, RSTRING(str)->ptr, s - RSTRING(str)->ptr); + t += s - RSTRING(str)->ptr; + + for (; s < send; s++) { + c = *s; + if (ismbchar(*s)) { + int n = mbclen(*s); + + while (n-- && s < send) + *t++ = *s++; + s--; + continue; + } + switch (c) { + case '[': case ']': case '{': case '}': + case '(': case ')': case '|': case '-': + case '*': case '.': case '\\': + case '?': case '+': case '^': case '$': + case '#': + *t++ = '\\'; + break; + case ' ': + *t++ = '\\'; + *t++ = ' '; + continue; + case '\t': + *t++ = '\\'; + *t++ = 't'; + continue; + case '\n': + *t++ = '\\'; + *t++ = 'n'; + continue; + case '\r': + *t++ = '\\'; + *t++ = 'r'; + continue; + case '\f': + *t++ = '\\'; + *t++ = 'f'; + continue; + } + *t++ = c; + } + rb_str_resize(tmp, t - RSTRING(tmp)->ptr); + OBJ_INFECT(tmp, str); + return tmp; +} + + +/* + * call-seq: + * Regexp.escape(str) => a_str + * Regexp.quote(str) => a_str + * + * Escapes any characters that would have special meaning in a regular + * expression. Returns a new escaped string, or self if no characters are + * escaped. For any string, + * <code>Regexp.escape(<i>str</i>)=~<i>str</i></code> will be true. + * + * Regexp.escape('\\*?{}.') #=> \\\\\*\?\{\}\. + */ + +static VALUE +rb_reg_s_quote(argc, argv) + int argc; + VALUE *argv; +{ + VALUE str, kcode; + int kcode_saved = reg_kcode; + + rb_scan_args(argc, argv, "11", &str, &kcode); + if (!NIL_P(kcode)) { + rb_set_kcode(StringValuePtr(kcode)); + curr_kcode = reg_kcode; + reg_kcode = kcode_saved; + } + StringValue(str); + str = rb_reg_quote(str); + kcode_reset_option(); + return str; +} + +int +rb_kcode() +{ + switch (reg_kcode) { + case KCODE_EUC: + return MBCTYPE_EUC; + case KCODE_SJIS: + return MBCTYPE_SJIS; + case KCODE_UTF8: + return MBCTYPE_UTF8; + case KCODE_NONE: + return MBCTYPE_ASCII; + } + rb_bug("wrong reg_kcode value (0x%x)", reg_kcode); +} + +static int +rb_reg_get_kcode(re) + VALUE re; +{ + switch (RBASIC(re)->flags & KCODE_MASK) { + case KCODE_NONE: + return 16; + case KCODE_EUC: + return 32; + case KCODE_SJIS: + return 48; + case KCODE_UTF8: + return 64; + default: + return 0; + } +} + +int +rb_reg_options(re) + VALUE re; +{ + int options; + + rb_reg_check(re); + options = RREGEXP(re)->ptr->options & + (ONIG_OPTION_IGNORECASE|ONIG_OPTION_MULTILINE|ONIG_OPTION_EXTEND); + if (FL_TEST(re, KCODE_FIXED)) { + options |= rb_reg_get_kcode(re); + } + return options; +} + + +/* + * call-seq: + * Regexp.union([pattern]*) => new_str + * + * Return a <code>Regexp</code> object that is the union of the given + * <em>pattern</em>s, i.e., will match any of its parts. The <em>pattern</em>s + * can be Regexp objects, in which case their options will be preserved, or + * Strings. If no arguments are given, returns <code>/(?!)/</code>. + * + * Regexp.union #=> /(?!)/ + * Regexp.union("penzance") #=> /penzance/ + * Regexp.union("skiing", "sledding") #=> /skiing|sledding/ + * Regexp.union(/dogs/, /cats/i) #=> /(?-mix:dogs)|(?i-mx:cats)/ + */ +static VALUE +rb_reg_s_union(argc, argv) + int argc; + VALUE *argv; +{ + if (argc == 0) { + VALUE args[1]; + args[0] = rb_str_new2("(?!)"); + return rb_class_new_instance(1, args, rb_cRegexp); + } + else if (argc == 1) { + VALUE v; + v = rb_check_convert_type(argv[0], T_REGEXP, "Regexp", "to_regexp"); + if (!NIL_P(v)) + return v; + else { + VALUE args[1]; + args[0] = rb_reg_s_quote(argc, argv); + return rb_class_new_instance(1, args, rb_cRegexp); + } + } + else { + int i, kcode = -1; + VALUE kcode_re = Qnil; + VALUE source = rb_str_buf_new(0); + VALUE args[3]; + for (i = 0; i < argc; i++) { + volatile VALUE v; + if (0 < i) + rb_str_buf_cat2(source, "|"); + v = rb_check_convert_type(argv[i], T_REGEXP, "Regexp", "to_regexp"); + if (!NIL_P(v)) { + if (FL_TEST(v, KCODE_FIXED)) { + if (kcode == -1) { + kcode_re = v; + kcode = RBASIC(v)->flags & KCODE_MASK; + } + else if ((RBASIC(v)->flags & KCODE_MASK) != kcode) { + volatile VALUE str1, str2; + str1 = rb_inspect(kcode_re); + str2 = rb_inspect(v); + rb_raise(rb_eArgError, "mixed kcode: %s and %s", + RSTRING(str1)->ptr, RSTRING(str2)->ptr); + } + } + v = rb_reg_to_s(v); + } + else { + args[0] = argv[i]; + v = rb_reg_s_quote(1, args); + } + rb_str_buf_append(source, v); + } + args[0] = source; + args[1] = Qnil; + switch (kcode) { + case -1: + args[2] = Qnil; + break; + case KCODE_NONE: + args[2] = rb_str_new2("n"); + break; + case KCODE_EUC: + args[2] = rb_str_new2("e"); + break; + case KCODE_SJIS: + args[2] = rb_str_new2("s"); + break; + case KCODE_UTF8: + args[2] = rb_str_new2("u"); + break; + } + return rb_class_new_instance(3, args, rb_cRegexp); + } +} + +/* :nodoc: */ +static VALUE +rb_reg_init_copy(copy, re) + VALUE copy, re; +{ + if (copy == re) return copy; + rb_check_frozen(copy); + /* need better argument type check */ + if (!rb_obj_is_instance_of(re, rb_obj_class(copy))) { + rb_raise(rb_eTypeError, "wrong argument type"); + } + rb_reg_check(re); + rb_reg_initialize(copy, RREGEXP(re)->str, RREGEXP(re)->len, + rb_reg_options(re), Qfalse); + return copy; +} + +VALUE +rb_reg_regsub(str, src, regs) + VALUE str, src; + struct re_registers *regs; +{ + VALUE val = 0; + char *p, *s, *e; + unsigned char uc; + int no; + + + p = s = RSTRING(str)->ptr; + e = s + RSTRING(str)->len; + + while (s < e) { + char *ss = s; + + uc = (unsigned char)*s++; + if (ismbchar(uc)) { + s += mbclen(uc) - 1; + continue; + } + if (uc != '\\' || s == e) continue; + + if (!val) { + val = rb_str_buf_new(ss-p); + rb_str_buf_cat(val, p, ss-p); + } + else { + rb_str_buf_cat(val, p, ss-p); + } + + uc = (unsigned char)*s++; + p = s; + switch (uc) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + no = uc - '0'; + break; + case '&': + no = 0; + break; + + case '`': + rb_str_buf_cat(val, RSTRING(src)->ptr, BEG(0)); + continue; + + case '\'': + rb_str_buf_cat(val, RSTRING(src)->ptr+END(0), RSTRING(src)->len-END(0)); + continue; + + case '+': + no = regs->num_regs-1; + while (BEG(no) == -1 && no > 0) no--; + if (no == 0) continue; + break; + + case '\\': + rb_str_buf_cat(val, s-1, 1); + continue; + + default: + rb_str_buf_cat(val, s-2, 2); + continue; + } + + if (no >= 0) { + if (no >= regs->num_regs) continue; + if (BEG(no) == -1) continue; + rb_str_buf_cat(val, RSTRING(src)->ptr+BEG(no), END(no)-BEG(no)); + } + } + + if (p < e) { + if (!val) { + val = rb_str_buf_new(e-p); + rb_str_buf_cat(val, p, e-p); + } + else { + rb_str_buf_cat(val, p, e-p); + } + } + if (!val) return str; + + return val; +} + +const char* +rb_get_kcode() +{ + switch (reg_kcode) { + case KCODE_SJIS: + return "SJIS"; + case KCODE_EUC: + return "EUC"; + case KCODE_UTF8: + return "UTF8"; + default: + return "NONE"; + } +} + +static VALUE +kcode_getter() +{ + return rb_str_new2(rb_get_kcode()); +} + +void +rb_set_kcode(code) + const char *code; +{ + if (code == 0) goto set_no_conversion; + + switch (code[0]) { + case 'E': + case 'e': + reg_kcode = KCODE_EUC; + onigenc_set_default_encoding(ONIG_ENCODING_EUC_JP); + break; + case 'S': + case 's': + reg_kcode = KCODE_SJIS; + onigenc_set_default_encoding(ONIG_ENCODING_SJIS); + break; + case 'U': + case 'u': + reg_kcode = KCODE_UTF8; + onigenc_set_default_encoding(ONIG_ENCODING_UTF8); + break; + default: + case 'N': + case 'n': + case 'A': + case 'a': + set_no_conversion: + reg_kcode = KCODE_NONE; + onigenc_set_default_encoding(ONIG_ENCODING_ASCII); + break; + } +} + +static void +kcode_setter(val) + VALUE val; +{ + may_need_recompile = 1; + rb_set_kcode(StringValuePtr(val)); +} + +static VALUE +ignorecase_getter() +{ + return ruby_ignorecase?Qtrue:Qfalse; +} + +static void +ignorecase_setter(val, id) + VALUE val; + ID id; +{ + rb_warn("modifying %s is deprecated", rb_id2name(id)); + may_need_recompile = 1; + ruby_ignorecase = RTEST(val); +} + +static VALUE +match_getter() +{ + VALUE match = rb_backref_get(); + + if (NIL_P(match)) return Qnil; + rb_match_busy(match); + return match; +} + +static void +match_setter(val) + VALUE val; +{ + if (!NIL_P(val)) { + Check_Type(val, T_MATCH); + } + rb_backref_set(val); +} + +/* + * call-seq: + * Regexp.last_match => matchdata + * Regexp.last_match(fixnum) => str + * + * The first form returns the <code>MatchData</code> object generated by the + * last successful pattern match. Equivalent to reading the global variable + * <code>$~</code>. The second form returns the nth field in this + * <code>MatchData</code> object. + * + * /c(.)t/ =~ 'cat' #=> 0 + * Regexp.last_match #=> #<MatchData:0x401b3d30> + * Regexp.last_match(0) #=> "cat" + * Regexp.last_match(1) #=> "a" + * Regexp.last_match(2) #=> nil + */ + +static VALUE +rb_reg_s_last_match(argc, argv) + int argc; + VALUE *argv; +{ + VALUE nth; + + if (rb_scan_args(argc, argv, "01", &nth) == 1) { + return rb_reg_nth_match(NUM2INT(nth), rb_backref_get()); + } + return match_getter(); +} + + +/* + * Document-class: Regexp + * + * A <code>Regexp</code> holds a regular expression, used to match a pattern + * against strings. Regexps are created using the <code>/.../</code> and + * <code>%r{...}</code> literals, and by the <code>Regexp::new</code> + * constructor. + * + */ + +void +Init_Regexp() +{ + rb_eRegexpError = rb_define_class("RegexpError", rb_eStandardError); + + onigenc_set_default_caseconv_table((UChar* )casetable); +#if DEFAULT_KCODE == KCODE_EUC + onigenc_set_default_encoding(ONIG_ENCODING_EUC_JP); +#else +#if DEFAULT_KCODE == KCODE_SJIS + onigenc_set_default_encoding(ONIG_ENCODING_SJIS); +#else +#if DEFAULT_KCODE == KCODE_UTF8 + onigenc_set_default_encoding(ONIG_ENCODING_UTF8); +#else + onigenc_set_default_encoding(ONIG_ENCODING_ASCII); +#endif +#endif +#endif + + rb_define_virtual_variable("$~", match_getter, match_setter); + rb_define_virtual_variable("$&", last_match_getter, 0); + rb_define_virtual_variable("$`", prematch_getter, 0); + rb_define_virtual_variable("$'", postmatch_getter, 0); + rb_define_virtual_variable("$+", last_paren_match_getter, 0); + + rb_define_virtual_variable("$=", ignorecase_getter, ignorecase_setter); + rb_define_virtual_variable("$KCODE", kcode_getter, kcode_setter); + rb_define_virtual_variable("$-K", kcode_getter, kcode_setter); + + rb_cRegexp = rb_define_class("Regexp", rb_cObject); + rb_define_alloc_func(rb_cRegexp, rb_reg_s_alloc); + rb_define_singleton_method(rb_cRegexp, "compile", rb_class_new_instance, -1); + rb_define_singleton_method(rb_cRegexp, "quote", rb_reg_s_quote, -1); + rb_define_singleton_method(rb_cRegexp, "escape", rb_reg_s_quote, -1); + rb_define_singleton_method(rb_cRegexp, "union", rb_reg_s_union, -1); + rb_define_singleton_method(rb_cRegexp, "last_match", rb_reg_s_last_match, -1); + + rb_define_method(rb_cRegexp, "initialize", rb_reg_initialize_m, -1); + rb_define_method(rb_cRegexp, "initialize_copy", rb_reg_init_copy, 1); + rb_define_method(rb_cRegexp, "hash", rb_reg_hash, 0); + rb_define_method(rb_cRegexp, "eql?", rb_reg_equal, 1); + rb_define_method(rb_cRegexp, "==", rb_reg_equal, 1); + rb_define_method(rb_cRegexp, "=~", rb_reg_match, 1); + rb_define_method(rb_cRegexp, "===", rb_reg_eqq, 1); + rb_define_method(rb_cRegexp, "~", rb_reg_match2, 0); + rb_define_method(rb_cRegexp, "match", rb_reg_match_m, -1); + rb_define_method(rb_cRegexp, "to_s", rb_reg_to_s, 0); + rb_define_method(rb_cRegexp, "inspect", rb_reg_inspect, 0); + rb_define_method(rb_cRegexp, "source", rb_reg_source, 0); + rb_define_method(rb_cRegexp, "casefold?", rb_reg_casefold_p, 0); + rb_define_method(rb_cRegexp, "options", rb_reg_options_m, 0); + rb_define_method(rb_cRegexp, "kcode", rb_reg_kcode_m, 0); + + rb_define_const(rb_cRegexp, "IGNORECASE", INT2FIX(ONIG_OPTION_IGNORECASE)); + rb_define_const(rb_cRegexp, "EXTENDED", INT2FIX(ONIG_OPTION_EXTEND)); + rb_define_const(rb_cRegexp, "MULTILINE", INT2FIX(ONIG_OPTION_MULTILINE)); + + rb_global_variable(®_cache); + + rb_cMatch = rb_define_class("MatchData", rb_cObject); + rb_define_global_const("MatchingData", rb_cMatch); + rb_define_alloc_func(rb_cMatch, match_alloc); + rb_undef_method(CLASS_OF(rb_cMatch), "new"); + + rb_define_method(rb_cMatch, "initialize_copy", match_init_copy, 1); + rb_define_method(rb_cMatch, "size", match_size, 0); + rb_define_method(rb_cMatch, "length", match_size, 0); + rb_define_method(rb_cMatch, "offset", match_offset, 1); + rb_define_method(rb_cMatch, "begin", match_begin, 1); + rb_define_method(rb_cMatch, "end", match_end, 1); + rb_define_method(rb_cMatch, "to_a", match_to_a, 0); + rb_define_method(rb_cMatch, "[]", match_aref, -1); + rb_define_method(rb_cMatch, "captures", match_captures, 0); + rb_define_method(rb_cMatch, "select", match_select, -1); + rb_define_method(rb_cMatch, "values_at", match_values_at, -1); + rb_define_method(rb_cMatch, "pre_match", rb_reg_match_pre, 0); + rb_define_method(rb_cMatch, "post_match", rb_reg_match_post, 0); + rb_define_method(rb_cMatch, "to_s", match_to_s, 0); + rb_define_method(rb_cMatch, "inspect", rb_any_to_s, 0); /* in object.c */ + rb_define_method(rb_cMatch, "string", match_string, 0); +} +/********************************************************************** + regcomp.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regparse.h" + +OnigAmbigType OnigDefaultAmbigFlag = + (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE); + +extern OnigAmbigType +onig_get_default_ambig_flag() +{ + return OnigDefaultAmbigFlag; +} + +extern int +onig_set_default_ambig_flag(OnigAmbigType ambig_flag) +{ + OnigDefaultAmbigFlag = ambig_flag; + return 0; +} + + +#ifndef PLATFORM_UNALIGNED_WORD_ACCESS +static unsigned char PadBuf[WORD_ALIGNMENT_SIZE]; +#endif + +static UChar* +k_strdup(UChar* s, UChar* end) +{ + int len = end - s; + + if (len > 0) { + UChar* r = (UChar* )xmalloc(len + 1); + CHECK_NULL_RETURN(r); + xmemcpy(r, s, len); + r[len] = (UChar )0; + return r; + } + else return NULL; +} + +/* + Caution: node should not be a string node. + (s and end member address break) +*/ +static void +swap_node(Node* a, Node* b) +{ + Node c; + c = *a; *a = *b; *b = c; +} + +static OnigDistance +distance_add(OnigDistance d1, OnigDistance d2) +{ + if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE) + return ONIG_INFINITE_DISTANCE; + else { + if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2; + else return ONIG_INFINITE_DISTANCE; + } +} + +static OnigDistance +distance_multiply(OnigDistance d, int m) +{ + if (m == 0) return 0; + + if (d < ONIG_INFINITE_DISTANCE / m) + return d * m; + else + return ONIG_INFINITE_DISTANCE; +} + +static int +bitset_is_empty(BitSetRef bs) +{ + int i; + for (i = 0; i < BITSET_SIZE; i++) { + if (bs[i] != 0) return 0; + } + return 1; +} + +#ifdef ONIG_DEBUG +static int +bitset_on_num(BitSetRef bs) +{ + int i, n; + + n = 0; + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (BITSET_AT(bs, i)) n++; + } + return n; +} +#endif + +extern int +onig_bbuf_init(BBuf* buf, int size) +{ + buf->p = (UChar* )xmalloc(size); + if (IS_NULL(buf->p)) return(ONIGERR_MEMORY); + + buf->alloc = size; + buf->used = 0; + return 0; +} + + +#ifdef USE_SUBEXP_CALL + +static int +unset_addr_list_init(UnsetAddrList* uslist, int size) +{ + UnsetAddr* p; + + p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size); + CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY); + uslist->num = 0; + uslist->alloc = size; + uslist->us = p; + return 0; +} + +static void +unset_addr_list_end(UnsetAddrList* uslist) +{ + if (IS_NOT_NULL(uslist->us)) + xfree(uslist->us); +} + +static int +unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node) +{ + UnsetAddr* p; + int size; + + if (uslist->num >= uslist->alloc) { + size = uslist->alloc * 2; + p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size); + CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY); + uslist->alloc = size; + uslist->us = p; + } + + uslist->us[uslist->num].offset = offset; + uslist->us[uslist->num].target = node; + uslist->num++; + return 0; +} +#endif /* USE_SUBEXP_CALL */ + + +static int +add_opcode(regex_t* reg, int opcode) +{ + BBUF_ADD1(reg, opcode); + return 0; +} + +static int +add_rel_addr(regex_t* reg, int addr) +{ + RelAddrType ra = (RelAddrType )addr; + + BBUF_ADD(reg, &ra, SIZE_RELADDR); + return 0; +} + +static int +add_abs_addr(regex_t* reg, int addr) +{ + AbsAddrType ra = (AbsAddrType )addr; + + BBUF_ADD(reg, &ra, SIZE_ABSADDR); + return 0; +} + +static int +add_length(regex_t* reg, int len) +{ + LengthType l = (LengthType )len; + + BBUF_ADD(reg, &l, SIZE_LENGTH); + return 0; +} + +static int +add_mem_num(regex_t* reg, int num) +{ + MemNumType n = (MemNumType )num; + + BBUF_ADD(reg, &n, SIZE_MEMNUM); + return 0; +} + +static int +add_pointer(regex_t* reg, void* addr) +{ + PointerType ptr = (PointerType )addr; + + BBUF_ADD(reg, &ptr, SIZE_POINTER); + return 0; +} + +static int +add_option(regex_t* reg, OnigOptionType option) +{ + BBUF_ADD(reg, &option, SIZE_OPTION); + return 0; +} + +static int +add_opcode_rel_addr(regex_t* reg, int opcode, int addr) +{ + int r; + + r = add_opcode(reg, opcode); + if (r) return r; + r = add_rel_addr(reg, addr); + return r; +} + +static int +add_bytes(regex_t* reg, UChar* bytes, int len) +{ + BBUF_ADD(reg, bytes, len); + return 0; +} + +static int +add_bitset(regex_t* reg, BitSetRef bs) +{ + BBUF_ADD(reg, bs, SIZE_BITSET); + return 0; +} + +static int +add_opcode_option(regex_t* reg, int opcode, OnigOptionType option) +{ + int r; + + r = add_opcode(reg, opcode); + if (r) return r; + r = add_option(reg, option); + return r; +} + +static int compile_length_tree(Node* node, regex_t* reg); +static int compile_tree(Node* node, regex_t* reg); + + +#define IS_NEED_STR_LEN_OP_EXACT(op) \ + ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\ + (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC) + +static int +select_str_opcode(int mb_len, int str_len, int ignore_case) +{ + int op; + + if (ignore_case) { + switch (str_len) { + case 1: op = OP_EXACT1_IC; break; + default: op = OP_EXACTN_IC; break; + } + } + else { + switch (mb_len) { + case 1: + switch (str_len) { + case 1: op = OP_EXACT1; break; + case 2: op = OP_EXACT2; break; + case 3: op = OP_EXACT3; break; + case 4: op = OP_EXACT4; break; + case 5: op = OP_EXACT5; break; + default: op = OP_EXACTN; break; + } + break; + + case 2: + switch (str_len) { + case 1: op = OP_EXACTMB2N1; break; + case 2: op = OP_EXACTMB2N2; break; + case 3: op = OP_EXACTMB2N3; break; + default: op = OP_EXACTMB2N; break; + } + break; + + case 3: + op = OP_EXACTMB3N; + break; + + default: + op = OP_EXACTMBN; + break; + } + } + return op; +} + +static int +compile_tree_empty_check(Node* node, regex_t* reg, int empty_info) +{ + int r; + int saved_num_null_check = reg->num_null_check; + + if (empty_info != 0) { + r = add_opcode(reg, OP_NULL_CHECK_START); + if (r) return r; + r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */ + if (r) return r; + reg->num_null_check++; + } + + r = compile_tree(node, reg); + if (r) return r; + + if (empty_info != 0) { + if (empty_info == NQ_TARGET_IS_EMPTY) + r = add_opcode(reg, OP_NULL_CHECK_END); + else if (empty_info == NQ_TARGET_IS_EMPTY_MEM) + r = add_opcode(reg, OP_NULL_CHECK_END_MEMST); + else if (empty_info == NQ_TARGET_IS_EMPTY_REC) + r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH); + + if (r) return r; + r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */ + } + return r; +} + +#ifdef USE_SUBEXP_CALL +static int +compile_call(CallNode* node, regex_t* reg) +{ + int r; + + r = add_opcode(reg, OP_CALL); + if (r) return r; + r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg), + node->target); + if (r) return r; + r = add_abs_addr(reg, 0 /*dummy addr.*/); + return r; +} +#endif + +static int +compile_tree_n_times(Node* node, int n, regex_t* reg) +{ + int i, r; + + for (i = 0; i < n; i++) { + r = compile_tree(node, reg); + if (r) return r; + } + return 0; +} + +static int +add_compile_string_length(UChar* s, int mb_len, int str_len, + regex_t* reg, int ignore_case) +{ + int len; + int op = select_str_opcode(mb_len, str_len, ignore_case); + + len = SIZE_OPCODE; + + if (op == OP_EXACTMBN) len += SIZE_LENGTH; + if (IS_NEED_STR_LEN_OP_EXACT(op)) + len += SIZE_LENGTH; + + len += mb_len * str_len; + return len; +} + +static int +add_compile_string(UChar* s, int mb_len, int str_len, + regex_t* reg, int ignore_case) +{ + int op = select_str_opcode(mb_len, str_len, ignore_case); + add_opcode(reg, op); + + if (op == OP_EXACTMBN) + add_length(reg, mb_len); + + if (IS_NEED_STR_LEN_OP_EXACT(op)) { + if (op == OP_EXACTN_IC) + add_length(reg, mb_len * str_len); + else + add_length(reg, str_len); + } + + add_bytes(reg, s, mb_len * str_len); + return 0; +} + + +static int +compile_length_string_node(Node* node, regex_t* reg) +{ + int rlen, r, len, prev_len, slen, ambig; + OnigEncoding enc = reg->enc; + UChar *p, *prev; + StrNode* sn; + + sn = &(NSTRING(node)); + if (sn->end <= sn->s) + return 0; + + ambig = NSTRING_IS_AMBIG(node); + + p = prev = sn->s; + prev_len = enc_len(enc, p); + p += prev_len; + slen = 1; + rlen = 0; + + for (; p < sn->end; ) { + len = enc_len(enc, p); + if (len == prev_len) { + slen++; + } + else { + r = add_compile_string_length(prev, prev_len, slen, reg, ambig); + rlen += r; + prev = p; + slen = 1; + prev_len = len; + } + p += len; + } + r = add_compile_string_length(prev, prev_len, slen, reg, ambig); + rlen += r; + return rlen; +} + +static int +compile_length_string_raw_node(StrNode* sn, regex_t* reg) +{ + if (sn->end <= sn->s) + return 0; + + return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0); +} + +static int +compile_string_node(Node* node, regex_t* reg) +{ + int r, len, prev_len, slen, ambig; + OnigEncoding enc = reg->enc; + UChar *p, *prev, *end; + StrNode* sn; + + sn = &(NSTRING(node)); + if (sn->end <= sn->s) + return 0; + + end = sn->end; + ambig = NSTRING_IS_AMBIG(node); + + p = prev = sn->s; + prev_len = enc_len(enc, p); + p += prev_len; + slen = 1; + + for (; p < end; ) { + len = enc_len(enc, p); + if (len == prev_len) { + slen++; + } + else { + r = add_compile_string(prev, prev_len, slen, reg, ambig); + if (r) return r; + + prev = p; + slen = 1; + prev_len = len; + } + + p += len; + } + return add_compile_string(prev, prev_len, slen, reg, ambig); +} + +static int +compile_string_raw_node(StrNode* sn, regex_t* reg) +{ + if (sn->end <= sn->s) + return 0; + + return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0); +} + +static int +add_multi_byte_cclass(BBuf* mbuf, regex_t* reg) +{ +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS + add_length(reg, mbuf->used); + return add_bytes(reg, mbuf->p, mbuf->used); +#else + int r, pad_size; + UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH; + + GET_ALIGNMENT_PAD_SIZE(p, pad_size); + add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1)); + if (pad_size != 0) add_bytes(reg, PadBuf, pad_size); + + r = add_bytes(reg, mbuf->p, mbuf->used); + + /* padding for return value from compile_length_cclass_node() to be fix. */ + pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size; + if (pad_size != 0) add_bytes(reg, PadBuf, pad_size); + return r; +#endif +} + +static int +compile_length_cclass_node(CClassNode* cc, regex_t* reg) +{ + int len; + + if (IS_CCLASS_SHARE(cc)) { + len = SIZE_OPCODE + SIZE_POINTER; + return len; + } + + if (IS_NULL(cc->mbuf)) { + len = SIZE_OPCODE + SIZE_BITSET; + } + else { + if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) { + len = SIZE_OPCODE; + } + else { + len = SIZE_OPCODE + SIZE_BITSET; + } +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS + len += SIZE_LENGTH + cc->mbuf->used; +#else + len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1); +#endif + } + + return len; +} + +static int +compile_cclass_node(CClassNode* cc, regex_t* reg) +{ + int r; + + if (IS_CCLASS_SHARE(cc)) { + add_opcode(reg, OP_CCLASS_NODE); + r = add_pointer(reg, cc); + return r; + } + + if (IS_NULL(cc->mbuf)) { + if (IS_CCLASS_NOT(cc)) + add_opcode(reg, OP_CCLASS_NOT); + else + add_opcode(reg, OP_CCLASS); + + r = add_bitset(reg, cc->bs); + } + else { + if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) { + if (IS_CCLASS_NOT(cc)) + add_opcode(reg, OP_CCLASS_MB_NOT); + else + add_opcode(reg, OP_CCLASS_MB); + + r = add_multi_byte_cclass(cc->mbuf, reg); + } + else { + if (IS_CCLASS_NOT(cc)) + add_opcode(reg, OP_CCLASS_MIX_NOT); + else + add_opcode(reg, OP_CCLASS_MIX); + + r = add_bitset(reg, cc->bs); + if (r) return r; + r = add_multi_byte_cclass(cc->mbuf, reg); + } + } + + return r; +} + +static int +entry_repeat_range(regex_t* reg, int id, int lower, int upper) +{ +#define REPEAT_RANGE_ALLOC 4 + + OnigRepeatRange* p; + + if (reg->repeat_range_alloc == 0) { + p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC); + CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY); + reg->repeat_range = p; + reg->repeat_range_alloc = REPEAT_RANGE_ALLOC; + } + else if (reg->repeat_range_alloc <= id) { + int n; + n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC; + p = (OnigRepeatRange* )xrealloc(reg->repeat_range, + sizeof(OnigRepeatRange) * n); + CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY); + reg->repeat_range = p; + reg->repeat_range_alloc = n; + } + else { + p = reg->repeat_range; + } + + p[id].lower = lower; + p[id].upper = upper; + return 0; +} + +static int +compile_range_repeat_node(QualifierNode* qn, int target_len, int empty_info, + regex_t* reg) +{ + int r; + int num_repeat = reg->num_repeat; + + r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG); + if (r) return r; + r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */ + reg->num_repeat++; + if (r) return r; + r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC); + if (r) return r; + + r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper); + if (r) return r; + + r = compile_tree_empty_check(qn->target, reg, empty_info); + if (r) return r; + + if ( +#ifdef USE_SUBEXP_CALL + reg->num_call > 0 || +#endif + IS_QUALIFIER_IN_REPEAT(qn)) { + r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG); + } + else { + r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG); + } + if (r) return r; + r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */ + return r; +} + +#define QUALIFIER_EXPAND_LIMIT_SIZE 50 + +static int +compile_length_qualifier_node(QualifierNode* qn, regex_t* reg) +{ + int len, mod_tlen; + int infinite = IS_REPEAT_INFINITE(qn->upper); + int empty_info = qn->target_empty_info; + int tlen = compile_length_tree(qn->target, reg); + + if (tlen < 0) return tlen; + + /* anychar repeat */ + if (NTYPE(qn->target) == N_ANYCHAR) { + if (qn->greedy && infinite) { + if (IS_NOT_NULL(qn->next_head_exact)) + return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower; + else + return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower; + } + } + + if (empty_info != 0) + mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); + else + mod_tlen = tlen; + + if (infinite && + (qn->lower <= 1 || tlen * qn->lower <= QUALIFIER_EXPAND_LIMIT_SIZE)) { + if (qn->lower == 1 && tlen > QUALIFIER_EXPAND_LIMIT_SIZE) { + len = SIZE_OP_JUMP; + } + else { + len = tlen * qn->lower; + } + + if (qn->greedy) { + if (IS_NOT_NULL(qn->head_exact)) + len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP; + else if (IS_NOT_NULL(qn->next_head_exact)) + len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP; + else + len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP; + } + else + len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH; + } + else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */ + len = SIZE_OP_JUMP + tlen; + } + else if (!infinite && qn->greedy && + (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper + <= QUALIFIER_EXPAND_LIMIT_SIZE)) { + len = tlen * qn->lower; + len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower); + } + else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ + len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen; + } + else { + len = SIZE_OP_REPEAT_INC + + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM; + } + + return len; +} + +static int +is_anychar_star_qualifier(QualifierNode* qn) +{ + if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) && + NTYPE(qn->target) == N_ANYCHAR) + return 1; + else + return 0; +} + +static int +compile_qualifier_node(QualifierNode* qn, regex_t* reg) +{ + int i, r, mod_tlen; + int infinite = IS_REPEAT_INFINITE(qn->upper); + int empty_info = qn->target_empty_info; + int tlen = compile_length_tree(qn->target, reg); + + if (tlen < 0) return tlen; + + if (is_anychar_star_qualifier(qn)) { + r = compile_tree_n_times(qn->target, qn->lower, reg); + if (r) return r; + if (IS_NOT_NULL(qn->next_head_exact)) { + if (IS_MULTILINE(reg->options)) + r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); + else + r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); + if (r) return r; + return add_bytes(reg, NSTRING(qn->next_head_exact).s, 1); + } + else { + if (IS_MULTILINE(reg->options)) + return add_opcode(reg, OP_ANYCHAR_ML_STAR); + else + return add_opcode(reg, OP_ANYCHAR_STAR); + } + } + + if (empty_info != 0) + mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); + else + mod_tlen = tlen; + + if (infinite && + (qn->lower <= 1 || tlen * qn->lower <= QUALIFIER_EXPAND_LIMIT_SIZE)) { + if (qn->lower == 1 && tlen > QUALIFIER_EXPAND_LIMIT_SIZE) { + if (qn->greedy) { + if (IS_NOT_NULL(qn->head_exact)) + r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1); + else if (IS_NOT_NULL(qn->next_head_exact)) + r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT); + else + r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH); + } + else { + r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP); + } + if (r) return r; + } + else { + r = compile_tree_n_times(qn->target, qn->lower, reg); + if (r) return r; + } + + if (qn->greedy) { + if (IS_NOT_NULL(qn->head_exact)) { + r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1, + mod_tlen + SIZE_OP_JUMP); + if (r) return r; + add_bytes(reg, NSTRING(qn->head_exact).s, 1); + r = compile_tree_empty_check(qn->target, reg, empty_info); + if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, + -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1)); + } + else if (IS_NOT_NULL(qn->next_head_exact)) { + r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT, + mod_tlen + SIZE_OP_JUMP); + if (r) return r; + add_bytes(reg, NSTRING(qn->next_head_exact).s, 1); + r = compile_tree_empty_check(qn->target, reg, empty_info); + if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, + -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT)); + } + else { + r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); + if (r) return r; + r = compile_tree_empty_check(qn->target, reg, empty_info); + if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, + -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH)); + } + } + else { + r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen); + if (r) return r; + r = compile_tree_empty_check(qn->target, reg, empty_info); + if (r) return r; + r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH)); + } + } + else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */ + r = add_opcode_rel_addr(reg, OP_JUMP, tlen); + if (r) return r; + r = compile_tree(qn->target, reg); + } + else if (!infinite && qn->greedy && + (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper + <= QUALIFIER_EXPAND_LIMIT_SIZE)) { + int n = qn->upper - qn->lower; + + r = compile_tree_n_times(qn->target, qn->lower, reg); + if (r) return r; + + for (i = 0; i < n; i++) { + r = add_opcode_rel_addr(reg, OP_PUSH, + (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH); + if (r) return r; + r = compile_tree(qn->target, reg); + if (r) return r; + } + } + else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ + r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP); + if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, tlen); + if (r) return r; + r = compile_tree(qn->target, reg); + } + else { + r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg); + } + return r; +} + +static int +compile_length_option_node(EffectNode* node, regex_t* reg) +{ + int tlen; + OnigOptionType prev = reg->options; + + reg->options = node->option; + tlen = compile_length_tree(node->target, reg); + reg->options = prev; + + if (tlen < 0) return tlen; + + if (IS_DYNAMIC_OPTION(prev ^ node->option)) { + return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL + + tlen + SIZE_OP_SET_OPTION; + } + else + return tlen; +} + +static int +compile_option_node(EffectNode* node, regex_t* reg) +{ + int r; + OnigOptionType prev = reg->options; + + if (IS_DYNAMIC_OPTION(prev ^ node->option)) { + r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option); + if (r) return r; + r = add_opcode_option(reg, OP_SET_OPTION, prev); + if (r) return r; + r = add_opcode(reg, OP_FAIL); + if (r) return r; + } + + reg->options = node->option; + r = compile_tree(node->target, reg); + reg->options = prev; + + if (IS_DYNAMIC_OPTION(prev ^ node->option)) { + if (r) return r; + r = add_opcode_option(reg, OP_SET_OPTION, prev); + } + return r; +} + +static int +compile_length_effect_node(EffectNode* node, regex_t* reg) +{ + int len; + int tlen; + + if (node->type == EFFECT_OPTION) + return compile_length_option_node(node, reg); + + if (node->target) { + tlen = compile_length_tree(node->target, reg); + if (tlen < 0) return tlen; + } + else + tlen = 0; + + switch (node->type) { + case EFFECT_MEMORY: +#ifdef USE_SUBEXP_CALL + if (IS_EFFECT_CALLED(node)) { + len = SIZE_OP_MEMORY_START_PUSH + tlen + + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN; + if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) + len += (IS_EFFECT_RECURSION(node) + ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); + else + len += (IS_EFFECT_RECURSION(node) + ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); + } + else +#endif + { + if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum)) + len = SIZE_OP_MEMORY_START_PUSH; + else + len = SIZE_OP_MEMORY_START; + + len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum) + ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END); + } + break; + + case EFFECT_STOP_BACKTRACK: + if (IS_EFFECT_STOP_BT_SIMPLE_REPEAT(node)) { + QualifierNode* qn = &NQUALIFIER(node->target); + tlen = compile_length_tree(qn->target, reg); + if (tlen < 0) return tlen; + + len = tlen * qn->lower + + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP; + } + else { + len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT; + } + break; + + default: + return ONIGERR_TYPE_BUG; + break; + } + + return len; +} + +static int get_char_length_tree(Node* node, regex_t* reg, int* len); + +static int +compile_effect_node(EffectNode* node, regex_t* reg) +{ + int r, len; + + if (node->type == EFFECT_OPTION) + return compile_option_node(node, reg); + + switch (node->type) { + case EFFECT_MEMORY: +#ifdef USE_SUBEXP_CALL + if (IS_EFFECT_CALLED(node)) { + r = add_opcode(reg, OP_CALL); + if (r) return r; + node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP; + node->state |= NST_ADDR_FIXED; + r = add_abs_addr(reg, (int )node->call_addr); + if (r) return r; + len = compile_length_tree(node->target, reg); + len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN); + if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) + len += (IS_EFFECT_RECURSION(node) + ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); + else + len += (IS_EFFECT_RECURSION(node) + ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); + + r = add_opcode_rel_addr(reg, OP_JUMP, len); + if (r) return r; + } +#endif + if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum)) + r = add_opcode(reg, OP_MEMORY_START_PUSH); + else + r = add_opcode(reg, OP_MEMORY_START); + if (r) return r; + r = add_mem_num(reg, node->regnum); + if (r) return r; + r = compile_tree(node->target, reg); + if (r) return r; +#ifdef USE_SUBEXP_CALL + if (IS_EFFECT_CALLED(node)) { + if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) + r = add_opcode(reg, (IS_EFFECT_RECURSION(node) + ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH)); + else + r = add_opcode(reg, (IS_EFFECT_RECURSION(node) + ? OP_MEMORY_END_REC : OP_MEMORY_END)); + + if (r) return r; + r = add_mem_num(reg, node->regnum); + if (r) return r; + r = add_opcode(reg, OP_RETURN); + } + else +#endif + { + if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) + r = add_opcode(reg, OP_MEMORY_END_PUSH); + else + r = add_opcode(reg, OP_MEMORY_END); + if (r) return r; + r = add_mem_num(reg, node->regnum); + } + break; + + case EFFECT_STOP_BACKTRACK: + if (IS_EFFECT_STOP_BT_SIMPLE_REPEAT(node)) { + QualifierNode* qn = &NQUALIFIER(node->target); + r = compile_tree_n_times(qn->target, qn->lower, reg); + if (r) return r; + + len = compile_length_tree(qn->target, reg); + if (len < 0) return len; + + r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP); + if (r) return r; + r = compile_tree(qn->target, reg); + if (r) return r; + r = add_opcode(reg, OP_POP); + if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, + -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP)); + } + else { + r = add_opcode(reg, OP_PUSH_STOP_BT); + if (r) return r; + r = compile_tree(node->target, reg); + if (r) return r; + r = add_opcode(reg, OP_POP_STOP_BT); + } + break; + + default: + return ONIGERR_TYPE_BUG; + break; + } + + return r; +} + +static int +compile_length_anchor_node(AnchorNode* node, regex_t* reg) +{ + int len; + int tlen = 0; + + if (node->target) { + tlen = compile_length_tree(node->target, reg); + if (tlen < 0) return tlen; + } + + switch (node->type) { + case ANCHOR_PREC_READ: + len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS; + break; + case ANCHOR_PREC_READ_NOT: + len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS; + break; + case ANCHOR_LOOK_BEHIND: + len = SIZE_OP_LOOK_BEHIND + tlen; + break; + case ANCHOR_LOOK_BEHIND_NOT: + len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT; + break; + + default: + len = SIZE_OPCODE; + break; + } + + return len; +} + +static int +compile_anchor_node(AnchorNode* node, regex_t* reg) +{ + int r, len; + + switch (node->type) { + case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break; + case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break; + case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break; + case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break; + case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break; + case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break; + + case ANCHOR_WORD_BOUND: r = add_opcode(reg, OP_WORD_BOUND); break; + case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break; +#ifdef USE_WORD_BEGIN_END + case ANCHOR_WORD_BEGIN: r = add_opcode(reg, OP_WORD_BEGIN); break; + case ANCHOR_WORD_END: r = add_opcode(reg, OP_WORD_END); break; +#endif + + case ANCHOR_PREC_READ: + r = add_opcode(reg, OP_PUSH_POS); + if (r) return r; + r = compile_tree(node->target, reg); + if (r) return r; + r = add_opcode(reg, OP_POP_POS); + break; + + case ANCHOR_PREC_READ_NOT: + len = compile_length_tree(node->target, reg); + if (len < 0) return len; + r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS); + if (r) return r; + r = compile_tree(node->target, reg); + if (r) return r; + r = add_opcode(reg, OP_FAIL_POS); + break; + + case ANCHOR_LOOK_BEHIND: + { + int n; + r = add_opcode(reg, OP_LOOK_BEHIND); + if (r) return r; + if (node->char_len < 0) { + r = get_char_length_tree(node->target, reg, &n); + if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + } + else + n = node->char_len; + r = add_length(reg, n); + if (r) return r; + r = compile_tree(node->target, reg); + } + break; + + case ANCHOR_LOOK_BEHIND_NOT: + { + int n; + len = compile_length_tree(node->target, reg); + r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT, + len + SIZE_OP_FAIL_LOOK_BEHIND_NOT); + if (r) return r; + if (node->char_len < 0) { + r = get_char_length_tree(node->target, reg, &n); + if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + } + else + n = node->char_len; + r = add_length(reg, n); + if (r) return r; + r = compile_tree(node->target, reg); + if (r) return r; + r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT); + } + break; + + default: + return ONIGERR_TYPE_BUG; + break; + } + + return r; +} + +static int +compile_length_tree(Node* node, regex_t* reg) +{ + int len, type, r; + + type = NTYPE(node); + switch (type) { + case N_LIST: + len = 0; + do { + r = compile_length_tree(NCONS(node).left, reg); + if (r < 0) return r; + len += r; + } while (IS_NOT_NULL(node = NCONS(node).right)); + r = len; + break; + + case N_ALT: + { + int n; + + n = r = 0; + do { + r += compile_length_tree(NCONS(node).left, reg); + n++; + } while (IS_NOT_NULL(node = NCONS(node).right)); + r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1); + } + break; + + case N_STRING: + if (NSTRING_IS_RAW(node)) + r = compile_length_string_raw_node(&(NSTRING(node)), reg); + else + r = compile_length_string_node(node, reg); + break; + + case N_CCLASS: + r = compile_length_cclass_node(&(NCCLASS(node)), reg); + break; + + case N_CTYPE: + case N_ANYCHAR: + r = SIZE_OPCODE; + break; + + case N_BACKREF: + { + BackrefNode* br = &(NBACKREF(node)); + + if (br->back_num == 1) { + r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 3) + ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM)); + } + else { + r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); + } + } + break; + +#ifdef USE_SUBEXP_CALL + case N_CALL: + r = SIZE_OP_CALL; + break; +#endif + + case N_QUALIFIER: + r = compile_length_qualifier_node(&(NQUALIFIER(node)), reg); + break; + + case N_EFFECT: + r = compile_length_effect_node(&NEFFECT(node), reg); + break; + + case N_ANCHOR: + r = compile_length_anchor_node(&(NANCHOR(node)), reg); + break; + + default: + return ONIGERR_TYPE_BUG; + break; + } + + return r; +} + +static int +compile_tree(Node* node, regex_t* reg) +{ + int n, type, len, pos, r = 0; + + type = NTYPE(node); + switch (type) { + case N_LIST: + do { + r = compile_tree(NCONS(node).left, reg); + } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + break; + + case N_ALT: + { + Node* x = node; + len = 0; + do { + len += compile_length_tree(NCONS(x).left, reg); + if (NCONS(x).right != NULL) { + len += SIZE_OP_PUSH + SIZE_OP_JUMP; + } + } while (IS_NOT_NULL(x = NCONS(x).right)); + pos = reg->used + len; /* goal position */ + + do { + len = compile_length_tree(NCONS(node).left, reg); + if (IS_NOT_NULL(NCONS(node).right)) { + r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP); + if (r) break; + } + r = compile_tree(NCONS(node).left, reg); + if (r) break; + if (IS_NOT_NULL(NCONS(node).right)) { + len = pos - (reg->used + SIZE_OP_JUMP); + r = add_opcode_rel_addr(reg, OP_JUMP, len); + if (r) break; + } + } while (IS_NOT_NULL(node = NCONS(node).right)); + } + break; + + case N_STRING: + if (NSTRING_IS_RAW(node)) + r = compile_string_raw_node(&(NSTRING(node)), reg); + else + r = compile_string_node(node, reg); + break; + + case N_CCLASS: + r = compile_cclass_node(&(NCCLASS(node)), reg); + break; + + case N_CTYPE: + { + int op; + + switch (NCTYPE(node).type) { + case CTYPE_WORD: op = OP_WORD; break; + case CTYPE_NOT_WORD: op = OP_NOT_WORD; break; + default: + return ONIGERR_TYPE_BUG; + break; + } + r = add_opcode(reg, op); + } + break; + + case N_ANYCHAR: + if (IS_MULTILINE(reg->options)) + r = add_opcode(reg, OP_ANYCHAR_ML); + else + r = add_opcode(reg, OP_ANYCHAR); + break; + + case N_BACKREF: + { + int i; + BackrefNode* br = &(NBACKREF(node)); + + if (br->back_num == 1) { + n = br->back_static[0]; + if (IS_IGNORECASE(reg->options)) { + r = add_opcode(reg, OP_BACKREFN_IC); + if (r) return r; + r = add_mem_num(reg, n); + } + else { + switch (n) { + case 1: r = add_opcode(reg, OP_BACKREF1); break; + case 2: r = add_opcode(reg, OP_BACKREF2); break; + case 3: r = add_opcode(reg, OP_BACKREF3); break; + default: + r = add_opcode(reg, OP_BACKREFN); + if (r) return r; + r = add_mem_num(reg, n); + break; + } + } + } + else { + int* p; + + if (IS_IGNORECASE(reg->options)) { + add_opcode(reg, OP_BACKREF_MULTI_IC); + } + else { + add_opcode(reg, OP_BACKREF_MULTI); + } + + if (r) return r; + add_length(reg, br->back_num); + if (r) return r; + p = BACKREFS_P(br); + for (i = br->back_num - 1; i >= 0; i--) { + r = add_mem_num(reg, p[i]); + if (r) return r; + } + } + } + break; + +#ifdef USE_SUBEXP_CALL + case N_CALL: + r = compile_call(&(NCALL(node)), reg); + break; +#endif + + case N_QUALIFIER: + r = compile_qualifier_node(&(NQUALIFIER(node)), reg); + break; + + case N_EFFECT: + r = compile_effect_node(&NEFFECT(node), reg); + break; + + case N_ANCHOR: + r = compile_anchor_node(&(NANCHOR(node)), reg); + break; + + default: +#ifdef ONIG_DEBUG + fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node)); +#endif + break; + } + + return r; +} + +#ifdef USE_NAMED_GROUP + +static int +noname_disable_map(Node** plink, GroupNumRemap* map, int* counter) +{ + int r = 0; + Node* node = *plink; + + switch (NTYPE(node)) { + case N_LIST: + case N_ALT: + do { + r = noname_disable_map(&(NCONS(node).left), map, counter); + } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + break; + + case N_QUALIFIER: + { + Node** ptarget = &(NQUALIFIER(node).target); + Node* old = *ptarget; + r = noname_disable_map(ptarget, map, counter); + if (*ptarget != old && NTYPE(*ptarget) == N_QUALIFIER) { + onig_reduce_nested_qualifier(node, *ptarget); + } + } + break; + + case N_EFFECT: + { + EffectNode* en = &(NEFFECT(node)); + if (en->type == EFFECT_MEMORY) { + if (IS_EFFECT_NAMED_GROUP(en)) { + (*counter)++; + map[en->regnum].new_val = *counter; + en->regnum = *counter; + r = noname_disable_map(&(en->target), map, counter); + } + else { + *plink = en->target; + en->target = NULL_NODE; + onig_node_free(node); + r = noname_disable_map(plink, map, counter); + } + } + else + r = noname_disable_map(&(en->target), map, counter); + } + break; + + default: + break; + } + + return r; +} + +static int +renumber_node_backref(Node* node, GroupNumRemap* map) +{ + int i, pos, n, old_num; + int *backs; + BackrefNode* bn = &(NBACKREF(node)); + + if (! IS_BACKREF_NAME_REF(bn)) + return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; + + old_num = bn->back_num; + if (IS_NULL(bn->back_dynamic)) + backs = bn->back_static; + else + backs = bn->back_dynamic; + + for (i = 0, pos = 0; i < old_num; i++) { + n = map[backs[i]].new_val; + if (n > 0) { + backs[pos] = n; + pos++; + } + } + + bn->back_num = pos; + return 0; +} + +static int +renumber_by_map(Node* node, GroupNumRemap* map) +{ + int r = 0; + + switch (NTYPE(node)) { + case N_LIST: + case N_ALT: + do { + r = renumber_by_map(NCONS(node).left, map); + } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + break; + case N_QUALIFIER: + r = renumber_by_map(NQUALIFIER(node).target, map); + break; + case N_EFFECT: + r = renumber_by_map(NEFFECT(node).target, map); + break; + + case N_BACKREF: + r = renumber_node_backref(node, map); + break; + + default: + break; + } + + return r; +} + +static int +numbered_ref_check(Node* node) +{ + int r = 0; + + switch (NTYPE(node)) { + case N_LIST: + case N_ALT: + do { + r = numbered_ref_check(NCONS(node).left); + } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + break; + case N_QUALIFIER: + r = numbered_ref_check(NQUALIFIER(node).target); + break; + case N_EFFECT: + r = numbered_ref_check(NEFFECT(node).target); + break; + + case N_BACKREF: + if (! IS_BACKREF_NAME_REF(&(NBACKREF(node)))) + return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; + break; + + default: + break; + } + + return r; +} + +static int +disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env) +{ + int r, i, pos, counter; + BitStatusType loc; + GroupNumRemap* map; + + map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1)); + CHECK_NULL_RETURN_VAL(map, ONIGERR_MEMORY); + for (i = 1; i <= env->num_mem; i++) { + map[i].new_val = 0; + } + counter = 0; + r = noname_disable_map(root, map, &counter); + if (r != 0) return r; + + r = renumber_by_map(*root, map); + if (r != 0) return r; + + for (i = 1, pos = 1; i <= env->num_mem; i++) { + if (map[i].new_val > 0) { + SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i]; + pos++; + } + } + + loc = env->capture_history; + BIT_STATUS_CLEAR(env->capture_history); + for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { + if (BIT_STATUS_AT(loc, i)) { + BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val); + } + } + + env->num_mem = env->num_named; + reg->num_mem = env->num_named; + + return onig_renumber_name_table(reg, map); +} +#endif /* USE_NAMED_GROUP */ + +#ifdef USE_SUBEXP_CALL +static int +unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg) +{ + int i, offset; + EffectNode* en; + AbsAddrType addr; + + for (i = 0; i < uslist->num; i++) { + en = &(NEFFECT(uslist->us[i].target)); + if (! IS_EFFECT_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG; + addr = en->call_addr; + offset = uslist->us[i].offset; + + BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR); + } + return 0; +} +#endif + +#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK +static int +qualifiers_memory_node_info(Node* node) +{ + int r = 0; + + switch (NTYPE(node)) { + case N_LIST: + case N_ALT: + { + int v; + do { + v = qualifiers_memory_node_info(NCONS(node).left); + if (v > r) r = v; + } while (v >= 0 && IS_NOT_NULL(node = NCONS(node).right)); + } + break; + +#ifdef USE_SUBEXP_CALL + case N_CALL: + if (IS_CALL_RECURSION(&NCALL(node))) { + return NQ_TARGET_IS_EMPTY_REC; /* tiny version */ + } + else + r = qualifiers_memory_node_info(NCALL(node).target); + break; +#endif + + case N_QUALIFIER: + { + QualifierNode* qn = &(NQUALIFIER(node)); + if (qn->upper != 0) { + r = qualifiers_memory_node_info(qn->target); + } + } + break; + + case N_EFFECT: + { + EffectNode* en = &(NEFFECT(node)); + switch (en->type) { + case EFFECT_MEMORY: + return NQ_TARGET_IS_EMPTY_MEM; + break; + + case EFFECT_OPTION: + case EFFECT_STOP_BACKTRACK: + r = qualifiers_memory_node_info(en->target); + break; + default: + break; + } + } + break; + + case N_BACKREF: + case N_STRING: + case N_CTYPE: + case N_CCLASS: + case N_ANYCHAR: + case N_ANCHOR: + default: + break; + } + + return r; +} +#endif /* USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK */ + +static int +get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env) +{ + OnigDistance tmin; + int r = 0; + + *min = 0; + switch (NTYPE(node)) { + case N_BACKREF: + { + int i; + int* backs; + Node** nodes = SCANENV_MEM_NODES(env); + BackrefNode* br = &(NBACKREF(node)); + if (br->state & NST_RECURSION) break; + + backs = BACKREFS_P(br); + if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF; + r = get_min_match_length(nodes[backs[0]], min, env); + if (r != 0) break; + for (i = 1; i < br->back_num; i++) { + if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; + r = get_min_match_length(nodes[backs[i]], &tmin, env); + if (r != 0) break; + if (*min > tmin) *min = tmin; + } + } + break; + +#ifdef USE_SUBEXP_CALL + case N_CALL: + if (IS_CALL_RECURSION(&NCALL(node))) { + EffectNode* en = &(NEFFECT(NCALL(node).target)); + if (IS_EFFECT_MIN_FIXED(en)) + *min = en->min_len; + } + else + r = get_min_match_length(NCALL(node).target, min, env); + break; +#endif + + case N_LIST: + do { + r = get_min_match_length(NCONS(node).left, &tmin, env); + if (r == 0) *min += tmin; + } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + break; + + case N_ALT: + { + Node *x, *y; + y = node; + do { + x = NCONS(y).left; + r = get_min_match_length(x, &tmin, env); + if (r != 0) break; + if (y == node) *min = tmin; + else if (*min > tmin) *min = tmin; + } while (r == 0 && IS_NOT_NULL(y = NCONS(y).right)); + } + break; + + case N_STRING: + { + StrNode* sn = &(NSTRING(node)); + *min = sn->end - sn->s; + } + break; + + case N_CTYPE: + switch (NCTYPE(node).type) { + case CTYPE_WORD: *min = 1; break; + case CTYPE_NOT_WORD: *min = 1; break; + default: + break; + } + break; + + case N_CCLASS: + case N_ANYCHAR: + *min = 1; + break; + + case N_QUALIFIER: + { + QualifierNode* qn = &(NQUALIFIER(node)); + + if (qn->lower > 0) { + r = get_min_match_length(qn->target, min, env); + if (r == 0) + *min = distance_multiply(*min, qn->lower); + } + } + break; + + case N_EFFECT: + { + EffectNode* en = &(NEFFECT(node)); + switch (en->type) { + case EFFECT_MEMORY: +#ifdef USE_SUBEXP_CALL + if (IS_EFFECT_MIN_FIXED(en)) + *min = en->min_len; + else { + r = get_min_match_length(en->target, min, env); + if (r == 0) { + en->min_len = *min; + SET_EFFECT_STATUS(node, NST_MIN_FIXED); + } + } + break; +#endif + case EFFECT_OPTION: + case EFFECT_STOP_BACKTRACK: + r = get_min_match_length(en->target, min, env); + break; + } + } + break; + + case N_ANCHOR: + default: + break; + } + + return r; +} + +static int +get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env) +{ + OnigDistance tmax; + int r = 0; + + *max = 0; + switch (NTYPE(node)) { + case N_LIST: + do { + r = get_max_match_length(NCONS(node).left, &tmax, env); + if (r == 0) + *max = distance_add(*max, tmax); + } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + break; + + case N_ALT: + do { + r = get_max_match_length(NCONS(node).left, &tmax, env); + if (r == 0 && *max < tmax) *max = tmax; + } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + break; + + case N_STRING: + { + StrNode* sn = &(NSTRING(node)); + *max = sn->end - sn->s; + } + break; + + case N_CTYPE: + switch (NCTYPE(node).type) { + case CTYPE_WORD: + case CTYPE_NOT_WORD: + *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); + break; + + default: + break; + } + break; + + case N_CCLASS: + case N_ANYCHAR: + *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); + break; + + case N_BACKREF: + { + int i; + int* backs; + Node** nodes = SCANENV_MEM_NODES(env); + BackrefNode* br = &(NBACKREF(node)); + if (br->state & NST_RECURSION) { + *max = ONIG_INFINITE_DISTANCE; + break; + } + backs = BACKREFS_P(br); + for (i = 0; i < br->back_num; i++) { + if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; + r = get_max_match_length(nodes[backs[i]], &tmax, env); + if (r != 0) break; + if (*max < tmax) *max = tmax; + } + } + break; + +#ifdef USE_SUBEXP_CALL + case N_CALL: + if (! IS_CALL_RECURSION(&(NCALL(node)))) + r = get_max_match_length(NCALL(node).target, max, env); + else + *max = ONIG_INFINITE_DISTANCE; + break; +#endif + + case N_QUALIFIER: + { + QualifierNode* qn = &(NQUALIFIER(node)); + + if (qn->upper != 0) { + r = get_max_match_length(qn->target, max, env); + if (r == 0 && *max != 0) { + if (! IS_REPEAT_INFINITE(qn->upper)) + *max = distance_multiply(*max, qn->upper); + else + *max = ONIG_INFINITE_DISTANCE; + } + } + } + break; + + case N_EFFECT: + { + EffectNode* en = &(NEFFECT(node)); + switch (en->type) { + case EFFECT_MEMORY: +#ifdef USE_SUBEXP_CALL + if (IS_EFFECT_MAX_FIXED(en)) + *max = en->max_len; + else { + r = get_max_match_length(en->target, max, env); + if (r == 0) { + en->max_len = *max; + SET_EFFECT_STATUS(node, NST_MAX_FIXED); + } + } + break; +#endif + case EFFECT_OPTION: + case EFFECT_STOP_BACKTRACK: + r = get_max_match_length(en->target, max, env); + break; + } + } + break; + + case N_ANCHOR: + default: + break; + } + + return r; +} + +#define GET_CHAR_LEN_VARLEN -1 +#define GET_CHAR_LEN_TOP_ALT_VARLEN -2 + +/* fixed size pattern node only */ +static int +get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) +{ + int tlen; + int r = 0; + + level++; + *len = 0; + switch (NTYPE(node)) { + case N_LIST: + do { + r = get_char_length_tree1(NCONS(node).left, reg, &tlen, level); + if (r == 0) + *len = distance_add(*len, tlen); + } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + break; + + case N_ALT: + { + int tlen2; + int varlen = 0; + + r = get_char_length_tree1(NCONS(node).left, reg, &tlen, level); + while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)) { + r = get_char_length_tree1(NCONS(node).left, reg, &tlen2, level); + if (r == 0) { + if (tlen != tlen2) + varlen = 1; + } + } + if (r == 0) { + if (varlen != 0) { + if (level == 1) + r = GET_CHAR_LEN_TOP_ALT_VARLEN; + else + r = GET_CHAR_LEN_VARLEN; + } + else + *len = tlen; + } + } + break; + + case N_STRING: + { + StrNode* sn = &(NSTRING(node)); + UChar *s = sn->s; + while (s < sn->end) { + s += enc_len(reg->enc, s); + (*len)++; + } + } + break; + + case N_QUALIFIER: + { + QualifierNode* qn = &(NQUALIFIER(node)); + if (qn->lower == qn->upper) { + r = get_char_length_tree1(qn->target, reg, &tlen, level); + if (r == 0) + *len = distance_multiply(tlen, qn->lower); + } + else + r = GET_CHAR_LEN_VARLEN; + } + break; + +#ifdef USE_SUBEXP_CALL + case N_CALL: + if (! IS_CALL_RECURSION(&(NCALL(node)))) + r = get_char_length_tree1(NCALL(node).target, reg, len, level); + else + r = GET_CHAR_LEN_VARLEN; + break; +#endif + + case N_CTYPE: + switch (NCTYPE(node).type) { + case CTYPE_WORD: + case CTYPE_NOT_WORD: + *len = 1; + break; + } + break; + + case N_CCLASS: + case N_ANYCHAR: + *len = 1; + break; + + case N_EFFECT: + { + EffectNode* en = &(NEFFECT(node)); + switch (en->type) { + case EFFECT_MEMORY: +#ifdef USE_SUBEXP_CALL + if (IS_EFFECT_CLEN_FIXED(en)) + *len = en->char_len; + else { + r = get_char_length_tree1(en->target, reg, len, level); + if (r == 0) { + en->char_len = *len; + SET_EFFECT_STATUS(node, NST_CLEN_FIXED); + } + } + break; +#endif + case EFFECT_OPTION: + case EFFECT_STOP_BACKTRACK: + r = get_char_length_tree1(en->target, reg, len, level); + break; + default: + break; + } + } + break; + + case N_ANCHOR: + break; + + default: + r = GET_CHAR_LEN_VARLEN; + break; + } + + return r; +} + +static int +get_char_length_tree(Node* node, regex_t* reg, int* len) +{ + return get_char_length_tree1(node, reg, len, 0); +} + +extern int +onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc) +{ + int found; + + if (ONIGENC_MBC_MINLEN(enc) > 1 || (code >= SINGLE_BYTE_SIZE)) { + if (IS_NULL(cc->mbuf)) { + found = 0; + } + else { + found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0); + } + } + else { + found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1); + } + + if (IS_CCLASS_NOT(cc)) + return !found; + else + return found; +} + +/* x is not included y ==> 1 : 0 */ +static int +is_not_included(Node* x, Node* y, regex_t* reg) +{ + int i, len; + OnigCodePoint code; + UChar *p, c; + int ytype; + + retry: + ytype = NTYPE(y); + switch (NTYPE(x)) { + case N_CTYPE: + { + switch (ytype) { + case N_CTYPE: + switch (NCTYPE(x).type) { + case CTYPE_WORD: + if (NCTYPE(y).type == CTYPE_NOT_WORD) + return 1; + else + return 0; + break; + case CTYPE_NOT_WORD: + if (NCTYPE(y).type == CTYPE_WORD) + return 1; + else + return 0; + break; + default: + break; + } + break; + + case N_CCLASS: + swap: + { + Node* tmp; + tmp = x; x = y; y = tmp; + goto retry; + } + break; + + case N_STRING: + goto swap; + break; + + default: + break; + } + } + break; + + case N_CCLASS: + { + CClassNode* xc = &(NCCLASS(x)); + switch (ytype) { + case N_CTYPE: + switch (NCTYPE(y).type) { + case CTYPE_WORD: + if (IS_NULL(xc->mbuf) && !IS_CCLASS_NOT(xc)) { + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (BITSET_AT(xc->bs, i)) { + if (ONIGENC_IS_CODE_SB_WORD(reg->enc, i)) return 0; + } + } + return 1; + } + return 0; + break; + case CTYPE_NOT_WORD: + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (! ONIGENC_IS_CODE_SB_WORD(reg->enc, i)) { + if (!IS_CCLASS_NOT(xc)) { + if (BITSET_AT(xc->bs, i)) + return 0; + } + else { + if (! BITSET_AT(xc->bs, i)) + return 0; + } + } + } + return 1; + break; + + default: + break; + } + break; + + case N_CCLASS: + { + int v; + CClassNode* yc = &(NCCLASS(y)); + + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + v = BITSET_AT(xc->bs, i); + if ((v != 0 && !IS_CCLASS_NOT(xc)) || + (v == 0 && IS_CCLASS_NOT(xc))) { + v = BITSET_AT(yc->bs, i); + if ((v != 0 && !IS_CCLASS_NOT(yc)) || + (v == 0 && IS_CCLASS_NOT(yc))) + return 0; + } + } + if ((IS_NULL(xc->mbuf) && !IS_CCLASS_NOT(xc)) || + (IS_NULL(yc->mbuf) && !IS_CCLASS_NOT(yc))) + return 1; + return 0; + } + break; + + case N_STRING: + goto swap; + break; + + default: + break; + } + } + break; + + case N_STRING: + { + StrNode* xs = &(NSTRING(x)); + if (NSTRING_LEN(x) == 0) + break; + + c = *(xs->s); + switch (ytype) { + case N_CTYPE: + switch (NCTYPE(y).type) { + case CTYPE_WORD: + return (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end) ? 0 : 1); + break; + case CTYPE_NOT_WORD: + return (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end) ? 1 : 0); + break; + default: + break; + } + break; + + case N_CCLASS: + { + CClassNode* cc = &(NCCLASS(y)); + + code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s, + xs->s + ONIGENC_MBC_MAXLEN(reg->enc)); + return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1); + } + break; + + case N_STRING: + { + UChar *q; + StrNode* ys = &(NSTRING(y)); + len = NSTRING_LEN(x); + if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y); + if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) { + /* tiny version */ + return 0; + } + else { + for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) { + if (*p != *q) return 1; + } + } + } + break; + + default: + break; + } + } + break; + + default: + break; + } + + return 0; +} + +static Node* +get_head_value_node(Node* node, int exact, regex_t* reg) +{ + Node* n = NULL_NODE; + + switch (NTYPE(node)) { + case N_BACKREF: + case N_ALT: + case N_ANYCHAR: +#ifdef USE_SUBEXP_CALL + case N_CALL: +#endif + break; + + case N_CTYPE: + case N_CCLASS: + if (exact == 0) { + n = node; + } + break; + + case N_LIST: + n = get_head_value_node(NCONS(node).left, exact, reg); + break; + + case N_STRING: + { + StrNode* sn = &(NSTRING(node)); + + if (sn->end <= sn->s) + break; + + if (exact != 0 && + !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) { +#if 0 + UChar* tmp = sn->s; + if (! ONIGENC_IS_MBC_AMBIGUOUS(reg->enc, reg->ambig_flag, + &tmp, sn->end)) + n = node; +#endif + } + else { + n = node; + } + } + break; + + case N_QUALIFIER: + { + QualifierNode* qn = &(NQUALIFIER(node)); + if (qn->lower > 0) { + if (IS_NOT_NULL(qn->head_exact)) + n = qn->head_exact; + else + n = get_head_value_node(qn->target, exact, reg); + } + } + break; + + case N_EFFECT: + { + EffectNode* en = &(NEFFECT(node)); + switch (en->type) { + case EFFECT_OPTION: + { + OnigOptionType options = reg->options; + + reg->options = NEFFECT(node).option; + n = get_head_value_node(NEFFECT(node).target, exact, reg); + reg->options = options; + } + break; + + case EFFECT_MEMORY: + case EFFECT_STOP_BACKTRACK: + n = get_head_value_node(en->target, exact, reg); + break; + } + } + break; + + case N_ANCHOR: + if (NANCHOR(node).type == ANCHOR_PREC_READ) + n = get_head_value_node(NANCHOR(node).target, exact, reg); + break; + + default: + break; + } + + return n; +} + +static int +check_type_tree(Node* node, int type_mask, int effect_mask, int anchor_mask) +{ + int type, r = 0; + + type = NTYPE(node); + if ((type & type_mask) == 0) + return 1; + + switch (type) { + case N_LIST: + case N_ALT: + do { + r = check_type_tree(NCONS(node).left, type_mask, effect_mask, anchor_mask); + } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + break; + + case N_QUALIFIER: + r = check_type_tree(NQUALIFIER(node).target, type_mask, effect_mask, + anchor_mask); + break; + + case N_EFFECT: + { + EffectNode* en = &(NEFFECT(node)); + if ((en->type & effect_mask) == 0) + return 1; + + r = check_type_tree(en->target, type_mask, effect_mask, anchor_mask); + } + break; + + case N_ANCHOR: + type = NANCHOR(node).type; + if ((type & anchor_mask) == 0) + return 1; + + if (NANCHOR(node).target) + r = check_type_tree(NANCHOR(node).target, + type_mask, effect_mask, anchor_mask); + break; + + default: + break; + } + return r; +} + +#ifdef USE_SUBEXP_CALL + +#define RECURSION_EXIST 1 +#define RECURSION_INFINITE 2 + +static int +subexp_inf_recursive_check(Node* node, ScanEnv* env, int head) +{ + int type; + int r = 0; + + type = NTYPE(node); + switch (type) { + case N_LIST: + { + Node *x; + OnigDistance min; + int ret; + + x = node; + do { + ret = subexp_inf_recursive_check(NCONS(x).left, env, head); + if (ret < 0 || ret == RECURSION_INFINITE) return ret; + r |= ret; + if (head) { + ret = get_min_match_length(NCONS(x).left, &min, env); + if (ret != 0) return ret; + if (min != 0) head = 0; + } + } while (IS_NOT_NULL(x = NCONS(x).right)); + } + break; + + case N_ALT: + { + int ret; + r = RECURSION_EXIST; + do { + ret = subexp_inf_recursive_check(NCONS(node).left, env, head); + if (ret < 0 || ret == RECURSION_INFINITE) return ret; + r &= ret; + } while (IS_NOT_NULL(node = NCONS(node).right)); + } + break; + + case N_QUALIFIER: + r = subexp_inf_recursive_check(NQUALIFIER(node).target, env, head); + break; + + case N_ANCHOR: + { + AnchorNode* an = &(NANCHOR(node)); + switch (an->type) { + case ANCHOR_PREC_READ: + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND: + case ANCHOR_LOOK_BEHIND_NOT: + r = subexp_inf_recursive_check(an->target, env, head); + break; + } + } + break; + + case N_CALL: + r = subexp_inf_recursive_check(NCALL(node).target, env, head); + break; + + case N_EFFECT: + if (IS_EFFECT_MARK2(&(NEFFECT(node)))) + return 0; + else if (IS_EFFECT_MARK1(&(NEFFECT(node)))) + return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE); + else { + SET_EFFECT_STATUS(node, NST_MARK2); + r = subexp_inf_recursive_check(NEFFECT(node).target, env, head); + CLEAR_EFFECT_STATUS(node, NST_MARK2); + } + break; + + default: + break; + } + + return r; +} + +static int +subexp_inf_recursive_check_trav(Node* node, ScanEnv* env) +{ + int type; + int r = 0; + + type = NTYPE(node); + switch (type) { + case N_LIST: + case N_ALT: + do { + r = subexp_inf_recursive_check_trav(NCONS(node).left, env); + } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + break; + + case N_QUALIFIER: + r = subexp_inf_recursive_check_trav(NQUALIFIER(node).target, env); + break; + + case N_ANCHOR: + { + AnchorNode* an = &(NANCHOR(node)); + switch (an->type) { + case ANCHOR_PREC_READ: + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND: + case ANCHOR_LOOK_BEHIND_NOT: + r = subexp_inf_recursive_check_trav(an->target, env); + break; + } + } + break; + + case N_EFFECT: + { + EffectNode* en = &(NEFFECT(node)); + + if (IS_EFFECT_RECURSION(en)) { + SET_EFFECT_STATUS(node, NST_MARK1); + r = subexp_inf_recursive_check(en->target, env, 1); + if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION; + CLEAR_EFFECT_STATUS(node, NST_MARK1); + } + r = subexp_inf_recursive_check_trav(en->target, env); + } + + break; + + default: + break; + } + + return r; +} + +static int +subexp_recursive_check(Node* node) +{ + int type; + int r = 0; + + type = NTYPE(node); + switch (type) { + case N_LIST: + case N_ALT: + do { + r |= subexp_recursive_check(NCONS(node).left); + } while (IS_NOT_NULL(node = NCONS(node).right)); + break; + + case N_QUALIFIER: + r = subexp_recursive_check(NQUALIFIER(node).target); + break; + + case N_ANCHOR: + { + AnchorNode* an = &(NANCHOR(node)); + switch (an->type) { + case ANCHOR_PREC_READ: + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND: + case ANCHOR_LOOK_BEHIND_NOT: + r = subexp_recursive_check(an->target); + break; + } + } + break; + + case N_CALL: + r = subexp_recursive_check(NCALL(node).target); + if (r != 0) SET_CALL_RECURSION(node); + break; + + case N_EFFECT: + if (IS_EFFECT_MARK2(&(NEFFECT(node)))) + return 0; + else if (IS_EFFECT_MARK1(&(NEFFECT(node)))) + return 1; /* recursion */ + else { + SET_EFFECT_STATUS(node, NST_MARK2); + r = subexp_recursive_check(NEFFECT(node).target); + CLEAR_EFFECT_STATUS(node, NST_MARK2); + } + break; + + default: + break; + } + + return r; +} + + +static int +subexp_recursive_check_trav(Node* node, ScanEnv* env) +{ +#define FOUND_CALLED_NODE 1 + + int type; + int r = 0; + + type = NTYPE(node); + switch (type) { + case N_LIST: + case N_ALT: + { + int ret; + do { + ret = subexp_recursive_check_trav(NCONS(node).left, env); + if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE; + else if (ret < 0) return ret; + } while (IS_NOT_NULL(node = NCONS(node).right)); + } + break; + + case N_QUALIFIER: + r = subexp_recursive_check_trav(NQUALIFIER(node).target, env); + if (NQUALIFIER(node).upper == 0) { + if (r == FOUND_CALLED_NODE) + NQUALIFIER(node).is_refered = 1; + } + break; + + case N_ANCHOR: + { + AnchorNode* an = &(NANCHOR(node)); + switch (an->type) { + case ANCHOR_PREC_READ: + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND: + case ANCHOR_LOOK_BEHIND_NOT: + r = subexp_recursive_check_trav(an->target, env); + break; + } + } + break; + + case N_EFFECT: + { + EffectNode* en = &(NEFFECT(node)); + + if (! IS_EFFECT_RECURSION(en)) { + if (IS_EFFECT_CALLED(en)) { + SET_EFFECT_STATUS(node, NST_MARK1); + r = subexp_recursive_check(en->target); + if (r != 0) SET_EFFECT_STATUS(node, NST_RECURSION); + CLEAR_EFFECT_STATUS(node, NST_MARK1); + } + } + r = subexp_recursive_check_trav(en->target, env); + if (IS_EFFECT_CALLED(en)) + r |= FOUND_CALLED_NODE; + } + break; + + default: + break; + } + + return r; +} + +static int +setup_subexp_call(Node* node, ScanEnv* env) +{ + int type; + int r = 0; + + type = NTYPE(node); + switch (type) { + case N_LIST: + do { + r = setup_subexp_call(NCONS(node).left, env); + } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + break; + + case N_ALT: + do { + r = setup_subexp_call(NCONS(node).left, env); + } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + break; + + case N_QUALIFIER: + r = setup_subexp_call(NQUALIFIER(node).target, env); + break; + case N_EFFECT: + r = setup_subexp_call(NEFFECT(node).target, env); + break; + + case N_CALL: + { + int n, num, *refs; + UChar *p; + CallNode* cn = &(NCALL(node)); + Node** nodes = SCANENV_MEM_NODES(env); + +#ifdef USE_NAMED_GROUP + n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, &refs); +#else + n = -1; +#endif + if (n <= 0) { + /* name not found, check group number. (?*ddd) */ + p = cn->name; + num = onig_scan_unsigned_number(&p, cn->name_end, env->enc); + if (num <= 0 || p != cn->name_end) { + onig_scan_env_set_error_string(env, + ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); + return ONIGERR_UNDEFINED_NAME_REFERENCE; + } +#ifdef USE_NAMED_GROUP + if (env->num_named > 0 && + IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && + !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) { + return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; + } +#endif + if (num > env->num_mem) { + onig_scan_env_set_error_string(env, + ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end); + return ONIGERR_UNDEFINED_GROUP_REFERENCE; + } + cn->ref_num = num; + goto set_call_attr; + } + else if (n > 1) { + onig_scan_env_set_error_string(env, + ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end); + return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL; + } + else { + cn->ref_num = refs[0]; + set_call_attr: + cn->target = nodes[cn->ref_num]; + if (IS_NULL(cn->target)) { + onig_scan_env_set_error_string(env, + ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); + return ONIGERR_UNDEFINED_NAME_REFERENCE; + } + SET_EFFECT_STATUS(cn->target, NST_CALLED); + BIT_STATUS_ON_AT(env->bt_mem_start, cn->ref_num); + cn->unset_addr_list = env->unset_addr_list; + } + } + break; + + case N_ANCHOR: + { + AnchorNode* an = &(NANCHOR(node)); + + switch (an->type) { + case ANCHOR_PREC_READ: + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND: + case ANCHOR_LOOK_BEHIND_NOT: + r = setup_subexp_call(an->target, env); + break; + } + } + break; + + default: + break; + } + + return r; +} +#endif + +/* divide different length alternatives in look-behind. + (?<=A|B) ==> (?<=A)|(?<=B) + (?<!A|B) ==> (?<!A)(?<!B) +*/ +static int +divide_look_behind_alternatives(Node* node) +{ + Node tmp_node; + Node *head, *np, *insert_node; + AnchorNode* an = &(NANCHOR(node)); + int anc_type = an->type; + + head = an->target; + np = NCONS(head).left; + tmp_node = *node; *node = *head; *head = tmp_node; + NCONS(node).left = head; + NANCHOR(head).target = np; + + np = node; + while ((np = NCONS(np).right) != NULL_NODE) { + insert_node = onig_node_new_anchor(anc_type); + CHECK_NULL_RETURN_VAL(insert_node, ONIGERR_MEMORY); + NANCHOR(insert_node).target = NCONS(np).left; + NCONS(np).left = insert_node; + } + + if (anc_type == ANCHOR_LOOK_BEHIND_NOT) { + np = node; + do { + np->type = N_LIST; /* alt -> list */ + } while ((np = NCONS(np).right) != NULL_NODE); + } + return 0; +} + +static int +setup_look_behind(Node* node, regex_t* reg, ScanEnv* env) +{ + int r, len; + AnchorNode* an = &(NANCHOR(node)); + + r = get_char_length_tree(an->target, reg, &len); + if (r == 0) + an->char_len = len; + else if (r == GET_CHAR_LEN_VARLEN) + r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) { + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND)) + r = divide_look_behind_alternatives(node); + else + r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + } + + return r; +} + +static int +next_setup(Node* node, Node* next_node, regex_t* reg) +{ + int type; + + retry: + type = NTYPE(node); + if (type == N_QUALIFIER) { + QualifierNode* qn = &(NQUALIFIER(node)); + if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) { +#ifdef USE_QUALIFIER_PEEK_NEXT + qn->next_head_exact = get_head_value_node(next_node, 1, reg); +#endif + /* automatic posseivation a*b ==> (?>a*)b */ + if (qn->lower <= 1) { + int ttype = NTYPE(qn->target); + if (IS_NODE_TYPE_SIMPLE(ttype)) { + Node *x, *y; + x = get_head_value_node(qn->target, 0, reg); + if (IS_NOT_NULL(x)) { + y = get_head_value_node(next_node, 0, reg); + if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) { + Node* en = onig_node_new_effect(EFFECT_STOP_BACKTRACK); + CHECK_NULL_RETURN_VAL(en, ONIGERR_MEMORY); + SET_EFFECT_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT); + swap_node(node, en); + NEFFECT(node).target = en; + } + } + } + } + } + } + else if (type == N_EFFECT) { + EffectNode* en = &(NEFFECT(node)); + if (en->type == EFFECT_MEMORY) { + node = en->target; + goto retry; + } + } + return 0; +} + +static int +divide_ambig_string_node(Node* node, regex_t* reg) +{ + StrNode* sn = &NSTRING(node); + int ambig, prev_ambig; + UChar *prev, *p, *end, *prev_start, *start, *tmp, *wp; + Node *snode; + Node *root = NULL_NODE; + Node **tailp = (Node** )0; + + start = prev_start = p = sn->s; + end = sn->end; + if (p >= end) return 0; + + prev_ambig = ONIGENC_IS_MBC_AMBIGUOUS(reg->enc, reg->ambig_flag, &p, end); + + while (p < end) { + prev = p; + if (prev_ambig != (ambig = ONIGENC_IS_MBC_AMBIGUOUS(reg->enc, + reg->ambig_flag, &p, end))) { + + if (prev_ambig != 0) { + tmp = prev_start; + wp = prev_start; + while (tmp < prev) { + wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag, + &tmp, end, wp); + } + snode = onig_node_new_str(prev_start, wp); + CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY); + NSTRING_SET_AMBIG(snode); + if (wp != prev) NSTRING_SET_AMBIG_REDUCE(snode); + } + else { + snode = onig_node_new_str(prev_start, prev); + CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY); + } + + if (tailp == (Node** )0) { + root = onig_node_new_list(snode, NULL); + CHECK_NULL_RETURN_VAL(root, ONIGERR_MEMORY); + tailp = &(NCONS(root).right); + } + else { + *tailp = onig_node_new_list(snode, NULL); + CHECK_NULL_RETURN_VAL(*tailp, ONIGERR_MEMORY); + tailp = &(NCONS(*tailp).right); + } + + prev_ambig = ambig; + prev_start = prev; + } + } + + if (prev_start == start) { + if (prev_ambig != 0) { + NSTRING_SET_AMBIG(node); + tmp = start; + wp = start; + while (tmp < end) { + wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag, + &tmp, end, wp); + } + if (wp != sn->end) NSTRING_SET_AMBIG_REDUCE(node); + sn->end = wp; + } + } + else { + if (prev_ambig != 0) { + tmp = prev_start; + wp = prev_start; + while (tmp < end) { + wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag, + &tmp, end, wp); + } + snode = onig_node_new_str(prev_start, wp); + CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY); + NSTRING_SET_AMBIG(snode); + if (wp != end) NSTRING_SET_AMBIG_REDUCE(snode); + } + else { + snode = onig_node_new_str(prev_start, end); + CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY); + } + + if (tailp == (Node** )0) { + root = onig_node_new_list(snode, NULL); + CHECK_NULL_RETURN_VAL(root, ONIGERR_MEMORY); + tailp = &(NCONS(node).right); + } + else { + *tailp = onig_node_new_list(snode, NULL); + CHECK_NULL_RETURN_VAL(*tailp, ONIGERR_MEMORY); + tailp = &(NCONS(*tailp).right); + } + + swap_node(node, root); + onig_node_str_clear(root); /* should be after swap! */ + onig_node_free(root); /* free original string node */ + } + + return 0; +} + +#define IN_ALT (1<<0) +#define IN_NOT (1<<1) +#define IN_REPEAT (1<<2) +#define IN_VAR_REPEAT (1<<3) + +/* setup_tree does the following work. + 1. check empty loop. (set qn->target_empty_info) + 2. expand ignore-case in char class. + 3. set memory status bit flags. (reg->mem_stats) + 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact]. + 5. find invalid patterns in look-behind. + 6. expand repeated string. + */ +static int +setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) +{ + int type; + int r = 0; + + type = NTYPE(node); + switch (type) { + case N_LIST: + { + Node* prev = NULL_NODE; + do { + r = setup_tree(NCONS(node).left, reg, state, env); + if (IS_NOT_NULL(prev) && r == 0) { + r = next_setup(prev, NCONS(node).left, reg); + } + prev = NCONS(node).left; + } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + } + break; + + case N_ALT: + do { + r = setup_tree(NCONS(node).left, reg, (state | IN_ALT), env); + } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + break; + + case N_CCLASS: + break; + + case N_STRING: + if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) { + r = divide_ambig_string_node(node, reg); + } + break; + + case N_CTYPE: + case N_ANYCHAR: + break; + +#ifdef USE_SUBEXP_CALL + case N_CALL: + break; +#endif + + case N_BACKREF: + { + int i; + int* p; + Node** nodes = SCANENV_MEM_NODES(env); + BackrefNode* br = &(NBACKREF(node)); + p = BACKREFS_P(br); + for (i = 0; i < br->back_num; i++) { + if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; + BIT_STATUS_ON_AT(env->backrefed_mem, p[i]); + BIT_STATUS_ON_AT(env->bt_mem_start, p[i]); + SET_EFFECT_STATUS(nodes[p[i]], NST_MEM_BACKREFED); + } + } + break; + + case N_QUALIFIER: + { + OnigDistance d; + QualifierNode* qn = &(NQUALIFIER(node)); + Node* target = qn->target; + + if ((state & IN_REPEAT) != 0) { + qn->state |= NST_IN_REPEAT; + } + + if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) { + r = get_min_match_length(target, &d, env); + if (r) break; + if (d == 0) { + qn->target_empty_info = NQ_TARGET_IS_EMPTY; +#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK + r = qualifiers_memory_node_info(target); + if (r < 0) break; + if (r > 0) { + qn->target_empty_info = r; + } +#endif +#if 0 + r = get_max_match_length(target, &d, env); + if (r == 0 && d == 0) { + /* ()* ==> ()?, ()+ ==> () */ + qn->upper = 1; + if (qn->lower > 1) qn->lower = 1; + if (NTYPE(target) == N_STRING) { + qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */ + } + } +#endif + } + } + + state |= IN_REPEAT; + if (qn->lower != qn->upper) + state |= IN_VAR_REPEAT; + r = setup_tree(target, reg, state, env); + if (r) break; + + /* expand string */ +#define EXPAND_STRING_MAX_LENGTH 100 + if (NTYPE(target) == N_STRING) { + if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper && + qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) { + int len = NSTRING_LEN(target); + StrNode* sn = &(NSTRING(target)); + + if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) { + int i, n = qn->lower; + onig_node_conv_to_str_node(node, NSTRING(target).flag); + for (i = 0; i < n; i++) { + r = onig_node_str_cat(node, sn->s, sn->end); + if (r) break; + } + onig_node_free(target); + break; /* break case N_QUALIFIER: */ + } + } + } + +#ifdef USE_OP_PUSH_OR_JUMP_EXACT + if (qn->greedy && (qn->target_empty_info != 0)) { + if (NTYPE(target) == N_QUALIFIER) { + QualifierNode* tqn = &(NQUALIFIER(target)); + if (IS_NOT_NULL(tqn->head_exact)) { + qn->head_exact = tqn->head_exact; + tqn->head_exact = NULL; + } + } + else { + qn->head_exact = get_head_value_node(qn->target, 1, reg); + } + } +#endif + } + break; + + case N_EFFECT: + { + EffectNode* en = &(NEFFECT(node)); + + switch (en->type) { + case EFFECT_OPTION: + { + OnigOptionType options = reg->options; + reg->options = NEFFECT(node).option; + r = setup_tree(NEFFECT(node).target, reg, state, env); + reg->options = options; + } + break; + + case EFFECT_MEMORY: + if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) { + BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum); + /* SET_EFFECT_STATUS(node, NST_MEM_IN_ALT_NOT); */ + } + r = setup_tree(en->target, reg, state, env); + break; + + case EFFECT_STOP_BACKTRACK: + { + Node* target = en->target; + r = setup_tree(target, reg, state, env); + if (NTYPE(target) == N_QUALIFIER) { + QualifierNode* tqn = &(NQUALIFIER(target)); + if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 && + tqn->greedy != 0) { /* (?>a*), a*+ etc... */ + int qtype = NTYPE(tqn->target); + if (IS_NODE_TYPE_SIMPLE(qtype)) + SET_EFFECT_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT); + } + } + } + break; + } + } + break; + + case N_ANCHOR: + { + AnchorNode* an = &(NANCHOR(node)); + + switch (an->type) { + case ANCHOR_PREC_READ: + r = setup_tree(an->target, reg, state, env); + break; + case ANCHOR_PREC_READ_NOT: + r = setup_tree(an->target, reg, (state | IN_NOT), env); + break; + +/* allowed node types in look-behind */ +#define ALLOWED_TYPE_IN_LB \ + ( N_LIST | N_ALT | N_STRING | N_CCLASS | N_CTYPE | \ + N_ANYCHAR | N_ANCHOR | N_EFFECT | N_QUALIFIER | N_CALL ) + +#define ALLOWED_EFFECT_IN_LB ( EFFECT_MEMORY ) +#define ALLOWED_EFFECT_IN_LB_NOT 0 + +#define ALLOWED_ANCHOR_IN_LB \ +( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF ) +#define ALLOWED_ANCHOR_IN_LB_NOT \ +( ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF ) + /* can't allow all anchors, because \G in look-behind through Search(). + ex. /(?<=\G)zz/.match("azz") => success. */ + + case ANCHOR_LOOK_BEHIND: + { + r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, + ALLOWED_EFFECT_IN_LB, ALLOWED_ANCHOR_IN_LB); + if (r < 0) return r; + if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + r = setup_look_behind(node, reg, env); + if (r != 0) return r; + r = setup_tree(an->target, reg, state, env); + } + break; + + case ANCHOR_LOOK_BEHIND_NOT: + { + r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, + ALLOWED_EFFECT_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT); + if (r < 0) return r; + if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + r = setup_look_behind(node, reg, env); + if (r != 0) return r; + r = setup_tree(an->target, reg, (state | IN_NOT), env); + } + break; + } + } + break; + + default: + break; + } + + return r; +} + +/* set skip map for Boyer-Moor search */ +static int +set_bm_skip(UChar* s, UChar* end, OnigEncoding enc, + UChar skip[], int** int_skip) +{ + int i, len; + + len = end - s; + if (len < ONIG_CHAR_TABLE_SIZE) { + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len; + + for (i = 0; i < len - 1; i++) + skip[s[i]] = len - 1 - i; + } + else { + if (IS_NULL(*int_skip)) { + *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); + if (IS_NULL(*int_skip)) return ONIGERR_MEMORY; + } + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len; + + for (i = 0; i < len - 1; i++) + (*int_skip)[s[i]] = len - 1 - i; + } + return 0; +} + +#define OPT_EXACT_MAXLEN 24 + +typedef struct { + OnigDistance min; /* min byte length */ + OnigDistance max; /* max byte length */ +} MinMaxLen; + +typedef struct { + MinMaxLen mmd; + OnigEncoding enc; + OnigOptionType options; + OnigAmbigType ambig_flag; + ScanEnv* scan_env; +} OptEnv; + +typedef struct { + int left_anchor; + int right_anchor; +} OptAncInfo; + +typedef struct { + MinMaxLen mmd; /* info position */ + OptAncInfo anc; + + int reach_end; + int ignore_case; + int len; + UChar s[OPT_EXACT_MAXLEN]; +} OptExactInfo; + +typedef struct { + MinMaxLen mmd; /* info position */ + OptAncInfo anc; + + int value; /* weighted value */ + UChar map[ONIG_CHAR_TABLE_SIZE]; +} OptMapInfo; + +typedef struct { + MinMaxLen len; + + OptAncInfo anc; + OptExactInfo exb; /* boundary */ + OptExactInfo exm; /* middle */ + OptExactInfo expr; /* prec read (?=...) */ + + OptMapInfo map; /* boundary */ +} NodeOptInfo; + + +static int +map_position_value(OnigEncoding enc, int i) +{ + static short int ByteValTable[] = { + 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, + 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5, + 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1 + }; + + if (i < sizeof(ByteValTable)/sizeof(ByteValTable[0])) { + if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1) + return 20; + else + return (int )ByteValTable[i]; + } + else + return 4; /* Take it easy. */ +} + +static int +distance_value(MinMaxLen* mm) +{ + /* 1000 / (min-max-dist + 1) */ + static short int dist_vals[] = { + 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100, + 91, 83, 77, 71, 67, 63, 59, 56, 53, 50, + 48, 45, 43, 42, 40, 38, 37, 36, 34, 33, + 32, 31, 30, 29, 29, 28, 27, 26, 26, 25, + 24, 24, 23, 23, 22, 22, 21, 21, 20, 20, + 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, + 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, + 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, + 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 10, 10, 10, 10, 10 + }; + + int d; + + if (mm->max == ONIG_INFINITE_DISTANCE) return 0; + + d = mm->max - mm->min; + if (d < sizeof(dist_vals)/sizeof(dist_vals[0])) + /* return dist_vals[d] * 16 / (mm->min + 12); */ + return (int )dist_vals[d]; + else + return 1; +} + +static int +comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2) +{ + if (v2 <= 0) return -1; + if (v1 <= 0) return 1; + + v1 *= distance_value(d1); + v2 *= distance_value(d2); + + if (v2 > v1) return 1; + if (v2 < v1) return -1; + + if (d2->min < d1->min) return 1; + if (d2->min > d1->min) return -1; + return 0; +} + +static int +is_equal_mml(MinMaxLen* a, MinMaxLen* b) +{ + return (a->min == b->min && a->max == b->max) ? 1 : 0; +} + + +static void +set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max) +{ + mml->min = min; + mml->max = max; +} + +static void +clear_mml(MinMaxLen* mml) +{ + mml->min = mml->max = 0; +} + +static void +copy_mml(MinMaxLen* to, MinMaxLen* from) +{ + to->min = from->min; + to->max = from->max; +} + +static void +add_mml(MinMaxLen* to, MinMaxLen* from) +{ + to->min = distance_add(to->min, from->min); + to->max = distance_add(to->max, from->max); +} + +#if 0 +static void +add_len_mml(MinMaxLen* to, OnigDistance len) +{ + to->min = distance_add(to->min, len); + to->max = distance_add(to->max, len); +} +#endif + +static void +alt_merge_mml(MinMaxLen* to, MinMaxLen* from) +{ + if (to->min > from->min) to->min = from->min; + if (to->max < from->max) to->max = from->max; +} + +static void +copy_opt_env(OptEnv* to, OptEnv* from) +{ + *to = *from; +} + +static void +clear_opt_anc_info(OptAncInfo* anc) +{ + anc->left_anchor = 0; + anc->right_anchor = 0; +} + +static void +copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from) +{ + *to = *from; +} + +static void +concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right, + OnigDistance left_len, OnigDistance right_len) +{ + clear_opt_anc_info(to); + + to->left_anchor = left->left_anchor; + if (left_len == 0) { + to->left_anchor |= right->left_anchor; + } + + to->right_anchor = right->right_anchor; + if (right_len == 0) { + to->right_anchor |= left->right_anchor; + } +} + +static int +is_left_anchor(int anc) +{ + if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF || + anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ || + anc == ANCHOR_PREC_READ_NOT) + return 0; + + return 1; +} + +static int +is_set_opt_anc_info(OptAncInfo* to, int anc) +{ + if ((to->left_anchor & anc) != 0) return 1; + + return ((to->right_anchor & anc) != 0 ? 1 : 0); +} + +static void +add_opt_anc_info(OptAncInfo* to, int anc) +{ + if (is_left_anchor(anc)) + to->left_anchor |= anc; + else + to->right_anchor |= anc; +} + +static void +remove_opt_anc_info(OptAncInfo* to, int anc) +{ + if (is_left_anchor(anc)) + to->left_anchor &= ~anc; + else + to->right_anchor &= ~anc; +} + +static void +alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add) +{ + to->left_anchor &= add->left_anchor; + to->right_anchor &= add->right_anchor; +} + +static int +is_full_opt_exact_info(OptExactInfo* ex) +{ + return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0); +} + +static void +clear_opt_exact_info(OptExactInfo* ex) +{ + clear_mml(&ex->mmd); + clear_opt_anc_info(&ex->anc); + ex->reach_end = 0; + ex->ignore_case = 0; + ex->len = 0; + ex->s[0] = '\0'; +} + +static void +copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from) +{ + *to = *from; +} + +static void +concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add) +{ + int i, n; + OptAncInfo tanc; + + if (! to->ignore_case && add->ignore_case) { + if (to->len >= add->len) return ; /* avoid */ + + to->ignore_case = 1; + } + + for (i = to->len, n = 0; n < add->len && i < OPT_EXACT_MAXLEN; i++, n++) + to->s[i] = add->s[n]; + + to->len = i; + to->reach_end = (n == add->len ? add->reach_end : 0); + + concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1); + if (! to->reach_end) tanc.right_anchor = 0; + copy_opt_anc_info(&to->anc, &tanc); +} + +static void +concat_opt_exact_info_str(OptExactInfo* to, + UChar* s, UChar* end, int raw, OnigEncoding enc) +{ + int i, j, len; + UChar *p; + + for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) { + if (raw) { + to->s[i++] = *p++; + } + else { + len = enc_len(enc, p); + if (i + len > OPT_EXACT_MAXLEN) break; + for (j = 0; j < len; j++) + to->s[i++] = *p++; + } + } + + to->len = i; +} + +static void +alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env) +{ + int i, j, len; + + if (add->len == 0 || to->len == 0) { + clear_opt_exact_info(to); + return ; + } + + if (! is_equal_mml(&to->mmd, &add->mmd)) { + clear_opt_exact_info(to); + return ; + } + + for (i = 0; i < to->len && i < add->len; ) { + if (to->s[i] != add->s[i]) break; + len = enc_len(env->enc, to->s + i); + + for (j = 1; j < len; j++) { + if (to->s[i+j] != add->s[i+j]) break; + } + if (j < len) break; + i += len; + } + + if (! add->reach_end || i < add->len || i < to->len) { + to->reach_end = 0; + } + to->len = i; + to->ignore_case |= add->ignore_case; + + alt_merge_opt_anc_info(&to->anc, &add->anc); + if (! to->reach_end) to->anc.right_anchor = 0; +} + +static void +select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt) +{ + int v1, v2; + + v1 = now->len; + v2 = alt->len; + + if (v1 <= 2 && v2 <= 2) { + /* ByteValTable[x] is big value --> low price */ + v2 = map_position_value(enc, now->s[0]); + v1 = map_position_value(enc, alt->s[0]); + + if (now->len > 1) v1 += 5; + if (alt->len > 1) v2 += 5; + } + + if (now->ignore_case == 0) v1 *= 2; + if (alt->ignore_case == 0) v2 *= 2; + + if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) + copy_opt_exact_info(now, alt); +} + +static void +clear_opt_map_info(OptMapInfo* map) +{ + static OptMapInfo clean_info = { + {0, 0}, {0, 0}, 0, + { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + } + }; + + xmemcpy(map, &clean_info, sizeof(OptMapInfo)); +} + +static void +copy_opt_map_info(OptMapInfo* to, OptMapInfo* from) +{ + *to = *from; +} + +static void +add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc) +{ + if (map->map[c] == 0) { + map->map[c] = 1; + map->value += map_position_value(enc, c); + } +} + +static int +add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end, + OnigEncoding enc, OnigAmbigType ambig_flag) +{ + int i, j, n, len; + UChar buf[ONIGENC_MBC_NORMALIZE_MAXLEN]; + OnigCodePoint code, ccode; + OnigCompAmbigCodes* ccs; + OnigPairAmbigCodes* pccs; + OnigAmbigType amb; + + add_char_opt_map_info(map, p[0], enc); + code = ONIGENC_MBC_TO_CODE(enc, p, end); + + for (amb = 0x01; amb <= ONIGENC_AMBIGUOUS_MATCH_LIMIT; amb <<= 1) { + if ((amb & ambig_flag) == 0) continue; + + n = ONIGENC_GET_ALL_PAIR_AMBIG_CODES(enc, amb, &pccs); + for (i = 0; i < n; i++) { + if (pccs[i].from == code) { + len = ONIGENC_CODE_TO_MBC(enc, pccs[i].to, buf); + if (len < 0) return len; + add_char_opt_map_info(map, buf[0], enc); + } + } + + if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { + n = ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc, amb, &ccs); + for (i = 0; i < n; i++) { + if (ccs[i].code == code) { + for (j = 0; j < ccs[i].n; j++) { + ccode = ccs[i].items[j].code[0]; + len = ONIGENC_CODE_TO_MBC(enc, ccode, buf); + if (len < 0) return len; + add_char_opt_map_info(map, buf[0], enc); + } + break; + } + } + } + } + return 0; +} + +static void +select_opt_map_info(OptMapInfo* now, OptMapInfo* alt) +{ + static int z = 1<<15; /* 32768: something big value */ + + int v1, v2; + + if (alt->value == 0) return ; + if (now->value == 0) { + copy_opt_map_info(now, alt); + return ; + } + + v1 = z / now->value; + v2 = z / alt->value; + if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) + copy_opt_map_info(now, alt); +} + +static int +comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m) +{ +#define COMP_EM_BASE 20 + int ve, vm; + + if (m->value <= 0) return -1; + + ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2); + vm = COMP_EM_BASE * 5 * 2 / m->value; + return comp_distance_value(&e->mmd, &m->mmd, ve, vm); +} + +static void +alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add) +{ + int i, val; + + /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */ + if (to->value == 0) return ; + if (add->value == 0 || to->mmd.max < add->mmd.min) { + clear_opt_map_info(to); + return ; + } + + alt_merge_mml(&to->mmd, &add->mmd); + + val = 0; + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) { + if (add->map[i]) + to->map[i] = 1; + + if (to->map[i]) + val += map_position_value(enc, i); + } + to->value = val; + + alt_merge_opt_anc_info(&to->anc, &add->anc); +} + +static void +set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd) +{ + copy_mml(&(opt->exb.mmd), mmd); + copy_mml(&(opt->expr.mmd), mmd); + copy_mml(&(opt->map.mmd), mmd); +} + +static void +clear_node_opt_info(NodeOptInfo* opt) +{ + clear_mml(&opt->len); + clear_opt_anc_info(&opt->anc); + clear_opt_exact_info(&opt->exb); + clear_opt_exact_info(&opt->exm); + clear_opt_exact_info(&opt->expr); + clear_opt_map_info(&opt->map); +} + +static void +copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from) +{ + *to = *from; +} + +static void +concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add) +{ + int exb_reach, exm_reach; + OptAncInfo tanc; + + concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max); + copy_opt_anc_info(&to->anc, &tanc); + + if (add->exb.len > 0 && to->len.max == 0) { + concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc, + to->len.max, add->len.max); + copy_opt_anc_info(&add->exb.anc, &tanc); + } + + if (add->map.value > 0 && to->len.max == 0) { + if (add->map.mmd.max == 0) + add->map.anc.left_anchor |= to->anc.left_anchor; + } + + exb_reach = to->exb.reach_end; + exm_reach = to->exm.reach_end; + + if (add->len.max != 0) + to->exb.reach_end = to->exm.reach_end = 0; + + if (add->exb.len > 0) { + if (exb_reach) { + concat_opt_exact_info(&to->exb, &add->exb); + clear_opt_exact_info(&add->exb); + } + else if (exm_reach) { + concat_opt_exact_info(&to->exm, &add->exb); + clear_opt_exact_info(&add->exb); + } + } + select_opt_exact_info(enc, &to->exm, &add->exb); + select_opt_exact_info(enc, &to->exm, &add->exm); + + if (to->expr.len > 0) { + if (add->len.max > 0) { + if (to->expr.len > (int )add->len.max) + to->expr.len = add->len.max; + + if (to->expr.mmd.max == 0) + select_opt_exact_info(enc, &to->exb, &to->expr); + else + select_opt_exact_info(enc, &to->exm, &to->expr); + } + } + else if (add->expr.len > 0) { + copy_opt_exact_info(&to->expr, &add->expr); + } + + select_opt_map_info(&to->map, &add->map); + + add_mml(&to->len, &add->len); +} + +static void +alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env) +{ + alt_merge_opt_anc_info (&to->anc, &add->anc); + alt_merge_opt_exact_info(&to->exb, &add->exb, env); + alt_merge_opt_exact_info(&to->exm, &add->exm, env); + alt_merge_opt_exact_info(&to->expr, &add->expr, env); + alt_merge_opt_map_info(env->enc, &to->map, &add->map); + + alt_merge_mml(&to->len, &add->len); +} + + +#define MAX_NODE_OPT_INFO_REF_COUNT 5 + +static int +optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) +{ + int type; + int r = 0; + + clear_node_opt_info(opt); + set_bound_node_opt_info(opt, &env->mmd); + + type = NTYPE(node); + switch (type) { + case N_LIST: + { + OptEnv nenv; + NodeOptInfo nopt; + Node* nd = node; + + copy_opt_env(&nenv, env); + do { + r = optimize_node_left(NCONS(nd).left, &nopt, &nenv); + if (r == 0) { + add_mml(&nenv.mmd, &nopt.len); + concat_left_node_opt_info(env->enc, opt, &nopt); + } + } while (r == 0 && IS_NOT_NULL(nd = NCONS(nd).right)); + } + break; + + case N_ALT: + { + NodeOptInfo nopt; + Node* nd = node; + + do { + r = optimize_node_left(NCONS(nd).left, &nopt, env); + if (r == 0) { + if (nd == node) copy_node_opt_info(opt, &nopt); + else alt_merge_node_opt_info(opt, &nopt, env); + } + } while ((r == 0) && IS_NOT_NULL(nd = NCONS(nd).right)); + } + break; + + case N_STRING: + { + StrNode* sn = &(NSTRING(node)); + int slen = sn->end - sn->s; + int is_raw = NSTRING_IS_RAW(node); + + if (! NSTRING_IS_AMBIG(node)) { + concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, + NSTRING_IS_RAW(node), env->enc); + if (slen > 0) { + add_char_opt_map_info(&opt->map, *(sn->s), env->enc); + } + set_mml(&opt->len, slen, slen); + } + else { + int n, max; + + concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, + is_raw, env->enc); + opt->exb.ignore_case = 1; + + if (slen > 0) { + r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end, + env->enc, env->ambig_flag); + if (r != 0) break; + } + + if (NSTRING_IS_AMBIG_REDUCE(node)) { + n = onigenc_strlen(env->enc, sn->s, sn->end); + max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n; + } + else { + max = slen; + } + set_mml(&opt->len, slen, max); + } + + if (opt->exb.len == slen) + opt->exb.reach_end = 1; + } + break; + + case N_CCLASS: + { + int i, z; + CClassNode* cc = &(NCCLASS(node)); + + /* no need to check ignore case. (setted in setup_tree()) */ + + if (IS_NOT_NULL(cc->mbuf) || IS_CCLASS_NOT(cc)) { + OnigDistance min = ONIGENC_MBC_MINLEN(env->enc); + OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc); + + set_mml(&opt->len, min, max); + } + else { + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + z = BITSET_AT(cc->bs, i); + if ((z && !IS_CCLASS_NOT(cc)) || (!z && IS_CCLASS_NOT(cc))) { + add_char_opt_map_info(&opt->map, (UChar )i, env->enc); + } + } + set_mml(&opt->len, 1, 1); + } + } + break; + + case N_CTYPE: + { + int i, min, max; + + max = ONIGENC_MBC_MAXLEN_DIST(env->enc); + + if (max == 1) { + min = 1; + + switch (NCTYPE(node).type) { + case CTYPE_NOT_WORD: + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (! ONIGENC_IS_CODE_WORD(env->enc, i)) { + add_char_opt_map_info(&opt->map, (UChar )i, env->enc); + } + } + break; + + case CTYPE_WORD: + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (ONIGENC_IS_CODE_WORD(env->enc, i)) { + add_char_opt_map_info(&opt->map, (UChar )i, env->enc); + } + } + break; + } + } + else { + min = ONIGENC_MBC_MINLEN(env->enc); + } + set_mml(&opt->len, min, max); + } + break; + + case N_ANYCHAR: + { + OnigDistance min = ONIGENC_MBC_MINLEN(env->enc); + OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc); + set_mml(&opt->len, min, max); + } + break; + + case N_ANCHOR: + switch (NANCHOR(node).type) { + case ANCHOR_BEGIN_BUF: + case ANCHOR_BEGIN_POSITION: + case ANCHOR_BEGIN_LINE: + case ANCHOR_END_BUF: + case ANCHOR_SEMI_END_BUF: + case ANCHOR_END_LINE: + add_opt_anc_info(&opt->anc, NANCHOR(node).type); + break; + + case ANCHOR_PREC_READ: + { + NodeOptInfo nopt; + + r = optimize_node_left(NANCHOR(node).target, &nopt, env); + if (r == 0) { + if (nopt.exb.len > 0) + copy_opt_exact_info(&opt->expr, &nopt.exb); + else if (nopt.exm.len > 0) + copy_opt_exact_info(&opt->expr, &nopt.exm); + + opt->expr.reach_end = 0; + + if (nopt.map.value > 0) + copy_opt_map_info(&opt->map, &nopt.map); + } + } + break; + + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND: /* Sorry, I can't make use of it. */ + case ANCHOR_LOOK_BEHIND_NOT: + break; + } + break; + + case N_BACKREF: + { + int i; + int* backs; + OnigDistance min, max, tmin, tmax; + Node** nodes = SCANENV_MEM_NODES(env->scan_env); + BackrefNode* br = &(NBACKREF(node)); + + if (br->state & NST_RECURSION) { + set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); + break; + } + backs = BACKREFS_P(br); + r = get_min_match_length(nodes[backs[0]], &min, env->scan_env); + if (r != 0) break; + r = get_max_match_length(nodes[backs[0]], &max, env->scan_env); + if (r != 0) break; + for (i = 1; i < br->back_num; i++) { + r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env); + if (r != 0) break; + r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env); + if (r != 0) break; + if (min > tmin) min = tmin; + if (max < tmax) max = tmax; + } + if (r == 0) set_mml(&opt->len, min, max); + } + break; + +#ifdef USE_SUBEXP_CALL + case N_CALL: + if (IS_CALL_RECURSION(&(NCALL(node)))) + set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); + else { + OnigOptionType save = env->options; + env->options = NEFFECT(NCALL(node).target).option; + r = optimize_node_left(NCALL(node).target, opt, env); + env->options = save; + } + break; +#endif + + case N_QUALIFIER: + { + int i; + OnigDistance min, max; + NodeOptInfo nopt; + QualifierNode* qn = &(NQUALIFIER(node)); + + r = optimize_node_left(qn->target, &nopt, env); + if (r) break; + + if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) { + if (env->mmd.max == 0 && + NTYPE(qn->target) == N_ANYCHAR && qn->greedy) { + if (IS_POSIXLINE(env->options)) + add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_PL); + else + add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR); + } + } + else { + if (qn->lower > 0) { + copy_node_opt_info(opt, &nopt); + if (nopt.exb.len > 0) { + if (nopt.exb.reach_end) { + for (i = 2; i < qn->lower && + ! is_full_opt_exact_info(&opt->exb); i++) { + concat_opt_exact_info(&opt->exb, &nopt.exb); + } + if (i < qn->lower) { + opt->exb.reach_end = 0; + } + } + } + + if (qn->lower != qn->upper) { + opt->exb.reach_end = 0; + opt->exm.reach_end = 0; + } + if (qn->lower > 1) + opt->exm.reach_end = 0; + } + } + + min = distance_multiply(nopt.len.min, qn->lower); + if (IS_REPEAT_INFINITE(qn->upper)) + max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0); + else + max = distance_multiply(nopt.len.max, qn->upper); + + set_mml(&opt->len, min, max); + } + break; + + case N_EFFECT: + { + EffectNode* en = &(NEFFECT(node)); + + switch (en->type) { + case EFFECT_OPTION: + { + OnigOptionType save = env->options; + + env->options = en->option; + r = optimize_node_left(en->target, opt, env); + env->options = save; + } + break; + + case EFFECT_MEMORY: +#ifdef USE_SUBEXP_CALL + en->opt_count++; + if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) { + OnigDistance min, max; + + min = 0; + max = ONIG_INFINITE_DISTANCE; + if (IS_EFFECT_MIN_FIXED(en)) min = en->min_len; + if (IS_EFFECT_MAX_FIXED(en)) max = en->max_len; + set_mml(&opt->len, min, max); + } + else +#endif + { + r = optimize_node_left(en->target, opt, env); + + if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) { + if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum)) + remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK); + } + } + break; + + case EFFECT_STOP_BACKTRACK: + r = optimize_node_left(en->target, opt, env); + break; + } + } + break; + + default: +#ifdef ONIG_DEBUG + fprintf(stderr, "optimize_node_left: undefined node type %d\n", + NTYPE(node)); +#endif + r = ONIGERR_TYPE_BUG; + break; + } + + return r; +} + +static int +set_optimize_exact_info(regex_t* reg, OptExactInfo* e) +{ + int r; + + if (e->len == 0) return 0; + + if (e->ignore_case) { + reg->exact = (UChar* )xmalloc(e->len); + CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY); + xmemcpy(reg->exact, e->s, e->len); + reg->exact_end = reg->exact + e->len; + reg->optimize = ONIG_OPTIMIZE_EXACT_IC; + } + else { + int allow_reverse; + + reg->exact = k_strdup(e->s, e->s + e->len); + CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY); + reg->exact_end = reg->exact + e->len; + + if (e->anc.left_anchor & ANCHOR_BEGIN_LINE) + allow_reverse = 1; + else + allow_reverse = + ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end); + + if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { + r = set_bm_skip(reg->exact, reg->exact_end, reg->enc, + reg->map, &(reg->int_map)); + if (r) return r; + + reg->optimize = (allow_reverse != 0 + ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV); + } + else { + reg->optimize = ONIG_OPTIMIZE_EXACT; + } + } + + reg->dmin = e->mmd.min; + reg->dmax = e->mmd.max; + + if (reg->dmin != ONIG_INFINITE_DISTANCE) { + reg->threshold_len = reg->dmin + (reg->exact_end - reg->exact); + } + + return 0; +} + +static void +set_optimize_map_info(regex_t* reg, OptMapInfo* m) +{ + int i; + + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) + reg->map[i] = m->map[i]; + + reg->optimize = ONIG_OPTIMIZE_MAP; + reg->dmin = m->mmd.min; + reg->dmax = m->mmd.max; + + if (reg->dmin != ONIG_INFINITE_DISTANCE) { + reg->threshold_len = reg->dmin + 1; + } +} + +static void +set_sub_anchor(regex_t* reg, OptAncInfo* anc) +{ + reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE; + reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE; +} + +#ifdef ONIG_DEBUG +static void print_optimize_info(FILE* f, regex_t* reg); +#endif + +static int +set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) +{ + + int r; + NodeOptInfo opt; + OptEnv env; + + env.enc = reg->enc; + env.options = reg->options; + env.ambig_flag = reg->ambig_flag; + env.scan_env = scan_env; + clear_mml(&env.mmd); + + r = optimize_node_left(node, &opt, &env); + if (r) return r; + + reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF | + ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_PL); + + reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF); + + if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) { + reg->anchor_dmin = opt.len.min; + reg->anchor_dmax = opt.len.max; + } + + if (opt.exb.len > 0 || opt.exm.len > 0) { + select_opt_exact_info(reg->enc, &opt.exb, &opt.exm); + if (opt.map.value > 0 && + comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) { + goto set_map; + } + else { + r = set_optimize_exact_info(reg, &opt.exb); + set_sub_anchor(reg, &opt.exb.anc); + } + } + else if (opt.map.value > 0) { + set_map: + set_optimize_map_info(reg, &opt.map); + set_sub_anchor(reg, &opt.map.anc); + } + else { + reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE; + if (opt.len.max == 0) + reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE; + } + +#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH) + print_optimize_info(stderr, reg); +#endif + return r; +} + +static void +clear_optimize_info(regex_t* reg) +{ + reg->optimize = ONIG_OPTIMIZE_NONE; + reg->anchor = 0; + reg->anchor_dmin = 0; + reg->anchor_dmax = 0; + reg->sub_anchor = 0; + reg->exact_end = (UChar* )NULL; + reg->threshold_len = 0; + if (IS_NOT_NULL(reg->exact)) { + xfree(reg->exact); + reg->exact = (UChar* )NULL; + } +} + +#ifdef ONIG_DEBUG + +static void +print_distance_range(FILE* f, OnigDistance a, OnigDistance b) +{ + if (a == ONIG_INFINITE_DISTANCE) + fputs("inf", f); + else + fprintf(f, "(%u)", a); + + fputs("-", f); + + if (b == ONIG_INFINITE_DISTANCE) + fputs("inf", f); + else + fprintf(f, "(%u)", b); +} + +static void +print_anchor(FILE* f, int anchor) +{ + int q = 0; + + fprintf(f, "["); + + if (anchor & ANCHOR_BEGIN_BUF) { + fprintf(f, "begin-buf"); + q = 1; + } + if (anchor & ANCHOR_BEGIN_LINE) { + if (q) fprintf(f, ", "); + q = 1; + fprintf(f, "begin-line"); + } + if (anchor & ANCHOR_BEGIN_POSITION) { + if (q) fprintf(f, ", "); + q = 1; + fprintf(f, "begin-pos"); + } + if (anchor & ANCHOR_END_BUF) { + if (q) fprintf(f, ", "); + q = 1; + fprintf(f, "end-buf"); + } + if (anchor & ANCHOR_SEMI_END_BUF) { + if (q) fprintf(f, ", "); + q = 1; + fprintf(f, "semi-end-buf"); + } + if (anchor & ANCHOR_END_LINE) { + if (q) fprintf(f, ", "); + q = 1; + fprintf(f, "end-line"); + } + if (anchor & ANCHOR_ANYCHAR_STAR) { + if (q) fprintf(f, ", "); + q = 1; + fprintf(f, "anychar-star"); + } + if (anchor & ANCHOR_ANYCHAR_STAR_PL) { + if (q) fprintf(f, ", "); + fprintf(f, "anychar-star-pl"); + } + + fprintf(f, "]"); +} + +static void +print_optimize_info(FILE* f, regex_t* reg) +{ + static char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV", + "EXACT_IC", "MAP" }; + + fprintf(f, "optimize: %s\n", on[reg->optimize]); + fprintf(f, " anchor: "); print_anchor(f, reg->anchor); + if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0) + print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax); + fprintf(f, "\n"); + + if (reg->optimize) { + fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor); + fprintf(f, "\n"); + } + fprintf(f, "\n"); + + if (reg->exact) { + UChar *p; + fprintf(f, "exact: ["); + for (p = reg->exact; p < reg->exact_end; p++) { + fputc(*p, f); + } + fprintf(f, "]: length: %d\n", (reg->exact_end - reg->exact)); + } + else if (reg->optimize & ONIG_OPTIMIZE_MAP) { + int c, i, n = 0; + + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) + if (reg->map[i]) n++; + + fprintf(f, "map: n=%d\n", n); + if (n > 0) { + c = 0; + fputc('[', f); + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) { + if (reg->map[i] != 0) { + if (c > 0) fputs(", ", f); + c++; + if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 && + ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i)) + fputc(i, f); + else + fprintf(f, "%d", i); + } + } + fprintf(f, "]\n"); + } + } +} +#endif /* ONIG_DEBUG */ + + +static void +onig_free_body(regex_t* reg) +{ + if (IS_NOT_NULL(reg->p)) xfree(reg->p); + if (IS_NOT_NULL(reg->exact)) xfree(reg->exact); + if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map); + if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward); + if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range); + if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain); + +#ifdef USE_NAMED_GROUP + onig_names_free(reg); +#endif +} + +extern void +onig_free(regex_t* reg) +{ + if (IS_NOT_NULL(reg)) { + onig_free_body(reg); + xfree(reg); + } +} + +#define REGEX_TRANSFER(to,from) do {\ + (to)->state = ONIG_STATE_MODIFY;\ + onig_free_body(to);\ + xmemcpy(to, from, sizeof(regex_t));\ + xfree(from);\ +} while (0) + +extern void +onig_transfer(regex_t* to, regex_t* from) +{ + THREAD_ATOMIC_START; + REGEX_TRANSFER(to, from); + THREAD_ATOMIC_END; +} + +#define REGEX_CHAIN_HEAD(reg) do {\ + while (IS_NOT_NULL((reg)->chain)) {\ + (reg) = (reg)->chain;\ + }\ +} while (0) + +extern void +onig_chain_link_add(regex_t* to, regex_t* add) +{ + THREAD_ATOMIC_START; + REGEX_CHAIN_HEAD(to); + to->chain = add; + THREAD_ATOMIC_END; +} + +extern void +onig_chain_reduce(regex_t* reg) +{ + regex_t *head, *prev; + + THREAD_ATOMIC_START; + prev = reg; + head = prev->chain; + if (IS_NOT_NULL(head)) { + reg->state = ONIG_STATE_MODIFY; + while (IS_NOT_NULL(head->chain)) { + prev = head; + head = head->chain; + } + prev->chain = (regex_t* )NULL; + REGEX_TRANSFER(reg, head); + } + THREAD_ATOMIC_END; +} + +#if 0 +extern int +onig_clone(regex_t** to, regex_t* from) +{ + int r, size; + regex_t* reg; + +#ifdef USE_MULTI_THREAD_SYSTEM + if (ONIG_STATE(from) >= ONIG_STATE_NORMAL) { + ONIG_STATE_INC(from); + if (IS_NOT_NULL(from->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { + onig_chain_reduce(from); + ONIG_STATE_INC(from); + } + } + else { + int n = 0; + while (ONIG_STATE(from) < ONIG_STATE_NORMAL) { + if (++n > THREAD_PASS_LIMIT_COUNT) + return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; + THREAD_PASS; + } + ONIG_STATE_INC(from); + } +#endif /* USE_MULTI_THREAD_SYSTEM */ + + r = onig_alloc_init(®, ONIG_OPTION_NONE, ONIGENC_AMBIGUOUS_MATCH_DEFAULT, + from->enc, ONIG_SYNTAX_DEFAULT); + if (r != 0) { + ONIG_STATE_DEC(from); + return r; + } + + xmemcpy(reg, from, sizeof(onig_t)); + reg->chain = (regex_t* )NULL; + reg->state = ONIG_STATE_NORMAL; + + if (from->p) { + reg->p = (UChar* )xmalloc(reg->alloc); + if (IS_NULL(reg->p)) goto mem_error; + xmemcpy(reg->p, from->p, reg->alloc); + } + + if (from->exact) { + reg->exact = (UChar* )xmalloc(from->exact_end - from->exact); + if (IS_NULL(reg->exact)) goto mem_error; + reg->exact_end = reg->exact + (from->exact_end - from->exact); + xmemcpy(reg->exact, from->exact, reg->exact_end - reg->exact); + } + + if (from->int_map) { + size = sizeof(int) * ONIG_CHAR_TABLE_SIZE; + reg->int_map = (int* )xmalloc(size); + if (IS_NULL(reg->int_map)) goto mem_error; + xmemcpy(reg->int_map, from->int_map, size); + } + + if (from->int_map_backward) { + size = sizeof(int) * ONIG_CHAR_TABLE_SIZE; + reg->int_map_backward = (int* )xmalloc(size); + if (IS_NULL(reg->int_map_backward)) goto mem_error; + xmemcpy(reg->int_map_backward, from->int_map_backward, size); + } + +#ifdef USE_NAMED_GROUP + reg->name_table = names_clone(from); /* names_clone is not implemented */ +#endif + + ONIG_STATE_DEC(from); + *to = reg; + return 0; + + mem_error: + ONIG_STATE_DEC(from); + return ONIGERR_MEMORY; +} +#endif + +#ifdef ONIG_DEBUG +static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg)); +#endif +#ifdef ONIG_DEBUG_PARSE_TREE +static void print_tree P_((FILE* f, Node* node)); +#endif + +extern int +onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, + OnigErrorInfo* einfo) +{ +#define COMPILE_INIT_SIZE 20 + + int r, init_size; + Node* root; + ScanEnv scan_env; +#ifdef USE_SUBEXP_CALL + UnsetAddrList uslist; +#endif + + reg->state = ONIG_STATE_COMPILING; + + if (reg->alloc == 0) { + init_size = (pattern_end - pattern) * 2; + if (init_size <= 0) init_size = COMPILE_INIT_SIZE; + r = BBUF_INIT(reg, init_size); + if (r != 0) goto end; + } + else + reg->used = 0; + + reg->num_mem = 0; + reg->num_repeat = 0; + reg->num_null_check = 0; + reg->repeat_range_alloc = 0; + reg->repeat_range = (OnigRepeatRange* )NULL; + + r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env); + if (r != 0) goto err; + +#ifdef USE_NAMED_GROUP + /* mixed use named group and no-named group */ + if (scan_env.num_named > 0 && + IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && + !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) { + if (scan_env.num_named != scan_env.num_mem) + r = disable_noname_group_capture(&root, reg, &scan_env); + else + r = numbered_ref_check(root); + + if (r != 0) goto err; + } +#endif + +#ifdef ONIG_DEBUG_PARSE_TREE + print_tree(stderr, root); +#endif + +#ifdef USE_SUBEXP_CALL + if (scan_env.num_call > 0) { + r = unset_addr_list_init(&uslist, scan_env.num_call); + if (r != 0) goto err; + scan_env.unset_addr_list = &uslist; + r = setup_subexp_call(root, &scan_env); + if (r != 0) goto err_unset; + r = subexp_recursive_check_trav(root, &scan_env); + if (r < 0) goto err_unset; + r = subexp_inf_recursive_check_trav(root, &scan_env); + if (r != 0) goto err_unset; + + reg->num_call = scan_env.num_call; + } + else + reg->num_call = 0; +#endif + + r = setup_tree(root, reg, 0, &scan_env); + if (r != 0) goto err_unset; + + reg->capture_history = scan_env.capture_history; + reg->bt_mem_start = scan_env.bt_mem_start; + reg->bt_mem_start |= reg->capture_history; + if (IS_FIND_CONDITION(reg->options)) + BIT_STATUS_ON_ALL(reg->bt_mem_end); + else { + reg->bt_mem_end = scan_env.bt_mem_end; + reg->bt_mem_end |= reg->capture_history; + } + + clear_optimize_info(reg); +#ifndef ONIG_DONT_OPTIMIZE + r = set_optimize_info_from_tree(root, reg, &scan_env); + if (r != 0) goto err_unset; +#endif + + if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) { + xfree(scan_env.mem_nodes_dynamic); + scan_env.mem_nodes_dynamic = (Node** )NULL; + } + + r = compile_tree(root, reg); + if (r == 0) { + r = add_opcode(reg, OP_END); +#ifdef USE_SUBEXP_CALL + if (scan_env.num_call > 0) { + r = unset_addr_list_fix(&uslist, reg); + unset_addr_list_end(&uslist); + if (r) goto err; + } +#endif + + if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0)) + reg->stack_pop_level = STACK_POP_LEVEL_ALL; + else { + if (reg->bt_mem_start != 0) + reg->stack_pop_level = STACK_POP_LEVEL_MEM_START; + else + reg->stack_pop_level = STACK_POP_LEVEL_FREE; + } + } +#ifdef USE_SUBEXP_CALL + else if (scan_env.num_call > 0) { + unset_addr_list_end(&uslist); + } +#endif + onig_node_free(root); + +#ifdef ONIG_DEBUG_COMPILE +#ifdef USE_NAMED_GROUP + onig_print_names(stderr, reg); +#endif + print_compiled_byte_code_list(stderr, reg); +#endif + + end: + reg->state = ONIG_STATE_NORMAL; + return r; + + err_unset: +#ifdef USE_SUBEXP_CALL + if (scan_env.num_call > 0) { + unset_addr_list_end(&uslist); + } +#endif + err: + if (IS_NOT_NULL(scan_env.error)) { + if (IS_NOT_NULL(einfo)) { + einfo->par = scan_env.error; + einfo->par_end = scan_env.error_end; + } + } + + if (IS_NOT_NULL(root)) onig_node_free(root); + if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) + xfree(scan_env.mem_nodes_dynamic); + return r; +} + +extern int +onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, + OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, + OnigErrorInfo* einfo) +{ + int r; + regex_t *new_reg; + + r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo); + if (r) return r; + if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) { + onig_transfer(reg, new_reg); + } + else { + onig_chain_link_add(reg, new_reg); + } + return 0; +} + +static int onig_inited = 0; + +extern int +onig_alloc_init(regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag, + OnigEncoding enc, OnigSyntaxType* syntax) +{ + if (! onig_inited) + onig_init(); + + if (ONIGENC_IS_UNDEF(enc)) + return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED; + + *reg = (regex_t* )xmalloc(sizeof(regex_t)); + if (IS_NULL(*reg)) return ONIGERR_MEMORY; + (*reg)->state = ONIG_STATE_MODIFY; + + if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) { + option |= syntax->options; + option &= ~ONIG_OPTION_SINGLELINE; + } + else + option |= syntax->options; + + (*reg)->enc = enc; + (*reg)->options = option; + (*reg)->syntax = syntax; + (*reg)->optimize = 0; + (*reg)->exact = (UChar* )NULL; + (*reg)->int_map = (int* )NULL; + (*reg)->int_map_backward = (int* )NULL; + (*reg)->chain = (regex_t* )NULL; + + (*reg)->p = (UChar* )NULL; + (*reg)->alloc = 0; + (*reg)->used = 0; + (*reg)->name_table = (void* )NULL; + + (*reg)->ambig_flag = ambig_flag; + (*reg)->ambig_flag &= ONIGENC_SUPPORT_AMBIG_FLAG(enc); + + return 0; +} + +extern int +onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end, + OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, + OnigErrorInfo* einfo) +{ + int r; + + if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL; + + r = onig_alloc_init(reg, option, ONIGENC_AMBIGUOUS_MATCH_DEFAULT, + enc, syntax); + if (r) return r; + + r = onig_compile(*reg, pattern, pattern_end, einfo); + if (r) { + onig_free(*reg); + *reg = NULL; + } + return r; +} + +extern int +onig_init() +{ + if (onig_inited != 0) + return 0; + + onig_inited = 1; + + THREAD_ATOMIC_START; + + onigenc_init(); + onigenc_set_default_caseconv_table((UChar* )0); + +#ifdef ONIG_DEBUG_STATISTICS + onig_statistics_init(); +#endif + + THREAD_ATOMIC_END; + return 0; +} + + +extern int +onig_end() +{ + extern int onig_free_shared_cclass_table(); + + THREAD_ATOMIC_START; + +#ifdef ONIG_DEBUG_STATISTICS + onig_print_statistics(stderr); +#endif + +#ifdef USE_RECYCLE_NODE + onig_free_node_list(); +#endif + +#ifdef USE_SHARED_CCLASS_TABLE + onig_free_shared_cclass_table(); +#endif + + onig_inited = 0; + + THREAD_ATOMIC_END; + return 0; +} + + +#ifdef ONIG_DEBUG + +OnigOpInfoType OnigOpInfo[] = { + { OP_FINISH, "finish", ARG_NON }, + { OP_END, "end", ARG_NON }, + { OP_EXACT1, "exact1", ARG_SPECIAL }, + { OP_EXACT2, "exact2", ARG_SPECIAL }, + { OP_EXACT3, "exact3", ARG_SPECIAL }, + { OP_EXACT4, "exact4", ARG_SPECIAL }, + { OP_EXACT5, "exact5", ARG_SPECIAL }, + { OP_EXACTN, "exactn", ARG_SPECIAL }, + { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL }, + { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL }, + { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL }, + { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL }, + { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL }, + { OP_EXACTMBN, "exactmbn", ARG_SPECIAL }, + { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL }, + { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL }, + { OP_CCLASS, "cclass", ARG_SPECIAL }, + { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL }, + { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL }, + { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL }, + { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL }, + { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL }, + { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL }, + { OP_ANYCHAR, "anychar", ARG_NON }, + { OP_ANYCHAR_ML, "anychar-ml", ARG_NON }, + { OP_ANYCHAR_STAR, "anychar*", ARG_NON }, + { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON }, + { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL }, + { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL }, + { OP_WORD, "word", ARG_NON }, + { OP_NOT_WORD, "not-word", ARG_NON }, + { OP_WORD_SB, "word-sb", ARG_NON }, + { OP_WORD_MB, "word-mb", ARG_NON }, + { OP_WORD_BOUND, "word-bound", ARG_NON }, + { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON }, + { OP_WORD_BEGIN, "word-begin", ARG_NON }, + { OP_WORD_END, "word-end", ARG_NON }, + { OP_BEGIN_BUF, "begin-buf", ARG_NON }, + { OP_END_BUF, "end-buf", ARG_NON }, + { OP_BEGIN_LINE, "begin-line", ARG_NON }, + { OP_END_LINE, "end-line", ARG_NON }, + { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON }, + { OP_BEGIN_POSITION, "begin-position", ARG_NON }, + { OP_BACKREF1, "backref1", ARG_NON }, + { OP_BACKREF2, "backref2", ARG_NON }, + { OP_BACKREF3, "backref3", ARG_NON }, + { OP_BACKREFN, "backrefn", ARG_MEMNUM }, + { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL }, + { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL }, + { OP_BACKREF_MULTI_IC, "backref_multi-ic",ARG_SPECIAL }, + { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM }, + { OP_MEMORY_START, "mem-start", ARG_MEMNUM }, + { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM }, + { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM }, + { OP_MEMORY_END, "mem-end", ARG_MEMNUM }, + { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM }, + { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION }, + { OP_SET_OPTION, "set-option", ARG_OPTION }, + { OP_FAIL, "fail", ARG_NON }, + { OP_JUMP, "jump", ARG_RELADDR }, + { OP_PUSH, "push", ARG_RELADDR }, + { OP_POP, "pop", ARG_NON }, + { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL }, + { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL }, + { OP_REPEAT, "repeat", ARG_SPECIAL }, + { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL }, + { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM }, + { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM }, + { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM }, + { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM }, + { OP_NULL_CHECK_START, "null-check-start",ARG_MEMNUM }, + { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM }, + { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM }, + { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM }, + { OP_PUSH_POS, "push-pos", ARG_NON }, + { OP_POP_POS, "pop-pos", ARG_NON }, + { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR }, + { OP_FAIL_POS, "fail-pos", ARG_NON }, + { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON }, + { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON }, + { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL }, + { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL }, + { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON }, + { OP_CALL, "call", ARG_ABSADDR }, + { OP_RETURN, "return", ARG_NON }, + { -1, "", ARG_NON } +}; + +static char* +op2name(int opcode) +{ + int i; + + for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { + if (opcode == OnigOpInfo[i].opcode) + return OnigOpInfo[i].name; + } + return ""; +} + +static int +op2arg_type(int opcode) +{ + int i; + + for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { + if (opcode == OnigOpInfo[i].opcode) + return OnigOpInfo[i].arg_type; + } + return ARG_SPECIAL; +} + +static void +Indent(FILE* f, int indent) +{ + int i; + for (i = 0; i < indent; i++) putc(' ', f); +} + +static void +p_string(FILE* f, int len, UChar* s) +{ + fputs(":", f); + while (len-- > 0) { fputc(*s++, f); } +} + +static void +p_len_string(FILE* f, LengthType len, int mb_len, UChar* s) +{ + int x = len * mb_len; + + fprintf(f, ":%d:", len); + while (x-- > 0) { fputc(*s++, f); } +} + +extern void +onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, + OnigEncoding enc) +{ + int i, n, arg_type; + RelAddrType addr; + LengthType len; + MemNumType mem; + OnigCodePoint code; + UChar *q; + + fprintf(f, "[%s", op2name(*bp)); + arg_type = op2arg_type(*bp); + if (arg_type != ARG_SPECIAL) { + bp++; + switch (arg_type) { + case ARG_NON: + break; + case ARG_RELADDR: + GET_RELADDR_INC(addr, bp); + fprintf(f, ":(%d)", addr); + break; + case ARG_ABSADDR: + GET_ABSADDR_INC(addr, bp); + fprintf(f, ":(%d)", addr); + break; + case ARG_LENGTH: + GET_LENGTH_INC(len, bp); + fprintf(f, ":%d", len); + break; + case ARG_MEMNUM: + mem = *((MemNumType* )bp); + bp += SIZE_MEMNUM; + fprintf(f, ":%d", mem); + break; + case ARG_OPTION: + { + OnigOptionType option = *((OnigOptionType* )bp); + bp += SIZE_OPTION; + fprintf(f, ":%d", option); + } + break; + } + } + else { + switch (*bp++) { + case OP_EXACT1: + case OP_ANYCHAR_STAR_PEEK_NEXT: + case OP_ANYCHAR_ML_STAR_PEEK_NEXT: + p_string(f, 1, bp++); break; + case OP_EXACT2: + p_string(f, 2, bp); bp += 2; break; + case OP_EXACT3: + p_string(f, 3, bp); bp += 3; break; + case OP_EXACT4: + p_string(f, 4, bp); bp += 4; break; + case OP_EXACT5: + p_string(f, 5, bp); bp += 5; break; + case OP_EXACTN: + GET_LENGTH_INC(len, bp); + p_len_string(f, len, 1, bp); + bp += len; + break; + + case OP_EXACTMB2N1: + p_string(f, 2, bp); bp += 2; break; + case OP_EXACTMB2N2: + p_string(f, 4, bp); bp += 4; break; + case OP_EXACTMB2N3: + p_string(f, 6, bp); bp += 6; break; + case OP_EXACTMB2N: + GET_LENGTH_INC(len, bp); + p_len_string(f, len, 2, bp); + bp += len * 2; + break; + case OP_EXACTMB3N: + GET_LENGTH_INC(len, bp); + p_len_string(f, len, 3, bp); + bp += len * 3; + break; + case OP_EXACTMBN: + { + int mb_len; + + GET_LENGTH_INC(mb_len, bp); + GET_LENGTH_INC(len, bp); + fprintf(f, ":%d:%d:", mb_len, len); + n = len * mb_len; + while (n-- > 0) { fputc(*bp++, f); } + } + break; + + case OP_EXACT1_IC: + len = enc_len(enc, bp); + p_string(f, len, bp); + bp += len; + break; + case OP_EXACTN_IC: + GET_LENGTH_INC(len, bp); + p_len_string(f, len, 1, bp); + bp += len; + break; + + case OP_CCLASS: + n = bitset_on_num((BitSetRef )bp); + bp += SIZE_BITSET; + fprintf(f, ":%d", n); + break; + + case OP_CCLASS_NOT: + n = bitset_on_num((BitSetRef )bp); + bp += SIZE_BITSET; + fprintf(f, ":%d", n); + break; + + case OP_CCLASS_MB: + case OP_CCLASS_MB_NOT: + GET_LENGTH_INC(len, bp); + q = bp; +#ifndef PLATFORM_UNALIGNED_WORD_ACCESS + ALIGNMENT_RIGHT(q); +#endif + GET_CODE_POINT(code, q); + bp += len; + fprintf(f, ":%d:%d", (int )code, len); + break; + + case OP_CCLASS_MIX: + case OP_CCLASS_MIX_NOT: + n = bitset_on_num((BitSetRef )bp); + bp += SIZE_BITSET; + GET_LENGTH_INC(len, bp); + q = bp; +#ifndef PLATFORM_UNALIGNED_WORD_ACCESS + ALIGNMENT_RIGHT(q); +#endif + GET_CODE_POINT(code, q); + bp += len; + fprintf(f, ":%d:%d:%d", n, (int )code, len); + break; + + case OP_CCLASS_NODE: + { + CClassNode *cc; + + GET_POINTER_INC(cc, bp); + n = bitset_on_num(cc->bs); + fprintf(f, ":%u:%d", (unsigned int )cc, n); + } + break; + + case OP_BACKREFN_IC: + mem = *((MemNumType* )bp); + bp += SIZE_MEMNUM; + fprintf(f, ":%d", mem); + break; + + case OP_BACKREF_MULTI_IC: + case OP_BACKREF_MULTI: + fputs(" ", f); + GET_LENGTH_INC(len, bp); + for (i = 0; i < len; i++) { + GET_MEMNUM_INC(mem, bp); + if (i > 0) fputs(", ", f); + fprintf(f, "%d", mem); + } + break; + + case OP_REPEAT: + case OP_REPEAT_NG: + { + mem = *((MemNumType* )bp); + bp += SIZE_MEMNUM; + addr = *((RelAddrType* )bp); + bp += SIZE_RELADDR; + fprintf(f, ":%d:%d", mem, addr); + } + break; + + case OP_PUSH_OR_JUMP_EXACT1: + case OP_PUSH_IF_PEEK_NEXT: + addr = *((RelAddrType* )bp); + bp += SIZE_RELADDR; + fprintf(f, ":(%d)", addr); + p_string(f, 1, bp); + bp += 1; + break; + + case OP_LOOK_BEHIND: + GET_LENGTH_INC(len, bp); + fprintf(f, ":%d", len); + break; + + case OP_PUSH_LOOK_BEHIND_NOT: + GET_RELADDR_INC(addr, bp); + GET_LENGTH_INC(len, bp); + fprintf(f, ":%d:(%d)", len, addr); + break; + + default: + fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n", + *--bp); + } + } + fputs("]", f); + if (nextp) *nextp = bp; +} + +static void +print_compiled_byte_code_list(FILE* f, regex_t* reg) +{ + int ncode; + UChar* bp = reg->p; + UChar* end = reg->p + reg->used; + + fprintf(f, "code length: %d\n", reg->used); + + ncode = 0; + while (bp < end) { + ncode++; + if (bp > reg->p) { + if (ncode % 5 == 0) + fprintf(f, "\n"); + else + fputs(" ", f); + } + onig_print_compiled_byte_code(f, bp, &bp, reg->enc); + } + + fprintf(f, "\n"); +} + +static void +print_indent_tree(FILE* f, Node* node, int indent) +{ + int i, type; + int add = 3; + UChar* p; + + Indent(f, indent); + if (IS_NULL(node)) { + fprintf(f, "ERROR: null node!!!\n"); + exit (0); + } + + type = NTYPE(node); + switch (type) { + case N_LIST: + case N_ALT: + if (NTYPE(node) == N_LIST) + fprintf(f, "<list:%x>\n", (int )node); + else + fprintf(f, "<alt:%x>\n", (int )node); + + print_indent_tree(f, NCONS(node).left, indent + add); + while (IS_NOT_NULL(node = NCONS(node).right)) { + if (NTYPE(node) != type) { + fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node)); + exit(0); + } + print_indent_tree(f, NCONS(node).left, indent + add); + } + break; + + case N_STRING: + fprintf(f, "<string%s:%x>", + (NSTRING_IS_RAW(node) ? "-raw" : ""), (int )node); + for (p = NSTRING(node).s; p < NSTRING(node).end; p++) { + if (*p >= 0x20 && *p < 0x7f) + fputc(*p, f); + else { + fprintf(f, " 0x%02x", *p); + } + } + break; + + case N_CCLASS: + fprintf(f, "<cclass:%x>", (int )node); + if (IS_CCLASS_NOT(&NCCLASS(node))) fputs(" not", f); + if (NCCLASS(node).mbuf) { + BBuf* bbuf = NCCLASS(node).mbuf; + for (i = 0; i < bbuf->used; i++) { + if (i > 0) fprintf(f, ","); + fprintf(f, "%0x", bbuf->p[i]); + } + } + break; + + case N_CTYPE: + fprintf(f, "<ctype:%x> ", (int )node); + switch (NCTYPE(node).type) { + case CTYPE_WORD: fputs("word", f); break; + case CTYPE_NOT_WORD: fputs("not word", f); break; + default: + fprintf(f, "ERROR: undefined ctype.\n"); + exit(0); + } + break; + + case N_ANYCHAR: + fprintf(f, "<anychar:%x>", (int )node); + break; + + case N_ANCHOR: + fprintf(f, "<anchor:%x> ", (int )node); + switch (NANCHOR(node).type) { + case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break; + case ANCHOR_END_BUF: fputs("end buf", f); break; + case ANCHOR_BEGIN_LINE: fputs("begin line", f); break; + case ANCHOR_END_LINE: fputs("end line", f); break; + case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break; + case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break; + + case ANCHOR_WORD_BOUND: fputs("word bound", f); break; + case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break; +#ifdef USE_WORD_BEGIN_END + case ANCHOR_WORD_BEGIN: fputs("word begin", f); break; + case ANCHOR_WORD_END: fputs("word end", f); break; +#endif + case ANCHOR_PREC_READ: fputs("prec read", f); break; + case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); break; + case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); break; + case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); break; + + default: + fprintf(f, "ERROR: undefined anchor type.\n"); + break; + } + break; + + case N_BACKREF: + { + int* p; + BackrefNode* br = &(NBACKREF(node)); + p = BACKREFS_P(br); + fprintf(f, "<backref:%x>", (int )node); + for (i = 0; i < br->back_num; i++) { + if (i > 0) fputs(", ", f); + fprintf(f, "%d", p[i]); + } + } + break; + +#ifdef USE_SUBEXP_CALL + case N_CALL: + { + CallNode* cn = &(NCALL(node)); + fprintf(f, "<call:%x>", (int )node); + p_string(f, cn->name_end - cn->name, cn->name); + } + break; +#endif + + case N_QUALIFIER: + fprintf(f, "<qualifier:%x>{%d,%d}%s\n", (int )node, + NQUALIFIER(node).lower, NQUALIFIER(node).upper, + (NQUALIFIER(node).greedy ? "" : "?")); + print_indent_tree(f, NQUALIFIER(node).target, indent + add); + break; + + case N_EFFECT: + fprintf(f, "<effect:%x> ", (int )node); + switch (NEFFECT(node).type) { + case EFFECT_OPTION: + fprintf(f, "option:%d\n", NEFFECT(node).option); + print_indent_tree(f, NEFFECT(node).target, indent + add); + break; + case EFFECT_MEMORY: + fprintf(f, "memory:%d", NEFFECT(node).regnum); + break; + case EFFECT_STOP_BACKTRACK: + fprintf(f, "stop-bt"); + break; + + default: + break; + } + fprintf(f, "\n"); + print_indent_tree(f, NEFFECT(node).target, indent + add); + break; + + default: + fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node)); + break; + } + + if (type != N_LIST && type != N_ALT && type != N_QUALIFIER && + type != N_EFFECT) + fprintf(f, "\n"); + fflush(f); +} +#endif /* ONIG_DEBUG */ + +#ifdef ONIG_DEBUG_PARSE_TREE +static void +print_tree(FILE* f, Node* node) +{ + print_indent_tree(f, node, 0); +} +#endif +/********************************************************************** + regenc.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regint.h" + +OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT; + +extern int +onigenc_init() +{ + return 0; +} + +extern OnigEncoding +onigenc_get_default_encoding() +{ + return OnigEncDefaultCharEncoding; +} + +extern int +onigenc_set_default_encoding(OnigEncoding enc) +{ + OnigEncDefaultCharEncoding = enc; + return 0; +} + +extern UChar* +onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s) +{ + UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s); + if (p < s) { + p += enc_len(enc, p); + } + return p; +} + +extern UChar* +onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc, + const UChar* start, const UChar* s, const UChar** prev) +{ + UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s); + + if (p < s) { + if (prev) *prev = (const UChar* )p; + p += enc_len(enc, p); + } + else { + if (prev) *prev = (const UChar* )NULL; /* Sorry */ + } + return p; +} + +extern UChar* +onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s) +{ + if (s <= start) + return (UChar* )NULL; + + return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1); +} + +extern UChar* +onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n) +{ + while (ONIG_IS_NOT_NULL(s) && n-- > 0) { + if (s <= start) + return (UChar* )NULL; + + s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1); + } + return (UChar* )s; +} + +extern UChar* +onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n) +{ + UChar* q = (UChar* )p; + while (n-- > 0) { + q += ONIGENC_MBC_ENC_LEN(enc, q); + } + return (q <= end ? q : NULL); +} + +extern int +onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end) +{ + int n = 0; + UChar* q = (UChar* )p; + + while (q < end) { + q += ONIGENC_MBC_ENC_LEN(enc, q); + n++; + } + return n; +} + +extern int +onigenc_strlen_null(OnigEncoding enc, const UChar* s) +{ + int n = 0; + UChar* p = (UChar* )s; + + while (1) { + if (*p == '\0') { + UChar* q; + int len = ONIGENC_MBC_MINLEN(enc); + + if (len == 1) return n; + q = p + 1; + while (len > 1) { + if (*q != '\0') break; + q++; + len--; + } + if (len == 1) return n; + } + p += ONIGENC_MBC_ENC_LEN(enc, p); + n++; + } +} + +extern int +onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s) +{ + UChar* start = (UChar* )s; + UChar* p = (UChar* )s; + + while (1) { + if (*p == '\0') { + UChar* q; + int len = ONIGENC_MBC_MINLEN(enc); + + if (len == 1) return (int )(p - start); + q = p + 1; + while (len > 1) { + if (*q != '\0') break; + q++; + len--; + } + if (len == 1) return (int )(p - start); + } + p += ONIGENC_MBC_ENC_LEN(enc, p); + } +} + +#ifndef ONIG_RUBY_M17N + +#ifndef NOT_RUBY + +#define USE_APPLICATION_TO_LOWER_CASE_TABLE + +unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = { + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x228c, 0x2289, 0x2288, 0x2288, 0x2288, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0288, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0, + 0x00a0, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0, + 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, + 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2 +}; +#endif + +const UChar* OnigEncAsciiToLowerCaseTable = (const UChar* )0; + +#ifndef USE_APPLICATION_TO_LOWER_CASE_TABLE +static const UChar BuiltInAsciiToLowerCaseTable[] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', +}; +#endif /* not USE_APPLICATION_TO_LOWER_CASE_TABLE */ + +#ifdef USE_UPPER_CASE_TABLE +UChar OnigEncAsciiToUpperCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107', + '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', + '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', + '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137', + '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107', + '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', + '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', + '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', +}; +#endif + +unsigned short OnigEncAsciiCtypeTable[256] = { + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, + + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 +}; + +UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +#ifdef USE_UPPER_CASE_TABLE +UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107', + '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', + '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', + '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137', + '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107', + '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', + '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', + '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377', +}; +#endif + +extern void +onigenc_set_default_caseconv_table(const UChar* table) +{ + if (table == (const UChar* )0) { +#ifndef USE_APPLICATION_TO_LOWER_CASE_TABLE + table = BuiltInAsciiToLowerCaseTable; +#else + return ; +#endif + } + + if (table != OnigEncAsciiToLowerCaseTable) { + OnigEncAsciiToLowerCaseTable = table; + } +} + +extern UChar* +onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s) +{ + return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s); +} + +OnigPairAmbigCodes OnigAsciiPairAmbigCodes[] = { + { 0x41, 0x61 }, + { 0x42, 0x62 }, + { 0x43, 0x63 }, + { 0x44, 0x64 }, + { 0x45, 0x65 }, + { 0x46, 0x66 }, + { 0x47, 0x67 }, + { 0x48, 0x68 }, + { 0x49, 0x69 }, + { 0x4a, 0x6a }, + { 0x4b, 0x6b }, + { 0x4c, 0x6c }, + { 0x4d, 0x6d }, + { 0x4e, 0x6e }, + { 0x4f, 0x6f }, + { 0x50, 0x70 }, + { 0x51, 0x71 }, + { 0x52, 0x72 }, + { 0x53, 0x73 }, + { 0x54, 0x74 }, + { 0x55, 0x75 }, + { 0x56, 0x76 }, + { 0x57, 0x77 }, + { 0x58, 0x78 }, + { 0x59, 0x79 }, + { 0x5a, 0x7a }, + + { 0x61, 0x41 }, + { 0x62, 0x42 }, + { 0x63, 0x43 }, + { 0x64, 0x44 }, + { 0x65, 0x45 }, + { 0x66, 0x46 }, + { 0x67, 0x47 }, + { 0x68, 0x48 }, + { 0x69, 0x49 }, + { 0x6a, 0x4a }, + { 0x6b, 0x4b }, + { 0x6c, 0x4c }, + { 0x6d, 0x4d }, + { 0x6e, 0x4e }, + { 0x6f, 0x4f }, + { 0x70, 0x50 }, + { 0x71, 0x51 }, + { 0x72, 0x52 }, + { 0x73, 0x53 }, + { 0x74, 0x54 }, + { 0x75, 0x55 }, + { 0x76, 0x56 }, + { 0x77, 0x57 }, + { 0x78, 0x58 }, + { 0x79, 0x59 }, + { 0x7a, 0x5a } +}; + +extern int +onigenc_ascii_get_all_pair_ambig_codes(OnigAmbigType flag, + OnigPairAmbigCodes** ccs) +{ + if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { + *ccs = OnigAsciiPairAmbigCodes; + return (sizeof(OnigAsciiPairAmbigCodes) / sizeof(OnigPairAmbigCodes)); + } + else { + return 0; + } +} + +extern int +onigenc_nothing_get_all_comp_ambig_codes(OnigAmbigType flag, + OnigCompAmbigCodes** ccs) +{ + return 0; +} + +extern int +onigenc_iso_8859_1_get_all_pair_ambig_codes(OnigAmbigType flag, + OnigPairAmbigCodes** ccs) +{ + static OnigPairAmbigCodes cc[] = { + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe }, + + { 0xe0, 0xc0 }, + { 0xe1, 0xc1 }, + { 0xe2, 0xc2 }, + { 0xe3, 0xc3 }, + { 0xe4, 0xc4 }, + { 0xe5, 0xc5 }, + { 0xe6, 0xc6 }, + { 0xe7, 0xc7 }, + { 0xe8, 0xc8 }, + { 0xe9, 0xc9 }, + { 0xea, 0xca }, + { 0xeb, 0xcb }, + { 0xec, 0xcc }, + { 0xed, 0xcd }, + { 0xee, 0xce }, + { 0xef, 0xcf }, + + { 0xf0, 0xd0 }, + { 0xf1, 0xd1 }, + { 0xf2, 0xd2 }, + { 0xf3, 0xd3 }, + { 0xf4, 0xd4 }, + { 0xf5, 0xd5 }, + { 0xf6, 0xd6 }, + { 0xf8, 0xd8 }, + { 0xf9, 0xd9 }, + { 0xfa, 0xda }, + { 0xfb, 0xdb }, + { 0xfc, 0xdc }, + { 0xfd, 0xdd }, + { 0xfe, 0xde } + }; + + if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { + *ccs = OnigAsciiPairAmbigCodes; + return (sizeof(OnigAsciiPairAmbigCodes) / sizeof(OnigPairAmbigCodes)); + } + else if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { + *ccs = cc; + return sizeof(cc) / sizeof(OnigPairAmbigCodes); + } + else + return 0; +} + +extern int +onigenc_ess_tsett_get_all_comp_ambig_codes(OnigAmbigType flag, + OnigCompAmbigCodes** ccs) +{ + static OnigCompAmbigCodes folds[] = { + { 2, 0xdf, {{ 2, { 0x53, 0x53 } }, { 2, { 0x73, 0x73} } } } + }; + + if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) { + *ccs = folds; + return sizeof(folds) / sizeof(OnigCompAmbigCodes); + } + else + return 0; +} + +extern int +onigenc_not_support_get_ctype_code_range(int ctype, + OnigCodePoint* sbr[], OnigCodePoint* mbr[]) +{ + return ONIG_NO_SUPPORT_CONFIG; +} + +extern int +onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end) +{ + if (p < end) { + if (*p == 0x0a) return 1; + } + return 0; +} + +/* for single byte encodings */ +extern int +onigenc_ascii_mbc_to_normalize(OnigAmbigType flag, const UChar** p, const UChar*end, + UChar* lower) +{ + if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { + *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p); + } + else { + *lower = **p; + } + + (*p)++; + return 1; /* return byte length of converted char to lower */ +} + +extern int +onigenc_ascii_is_mbc_ambiguous(OnigAmbigType flag, + const UChar** pp, const UChar* end) +{ + const UChar* p = *pp; + + (*pp)++; + if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { + return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); + } + else { + return FALSE; + } +} + +extern int +onigenc_single_byte_mbc_enc_len(const UChar* p) +{ + return 1; +} + +extern OnigCodePoint +onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end) +{ + return (OnigCodePoint )(*p); +} + +extern int +onigenc_single_byte_code_to_mbclen(OnigCodePoint code) +{ + return 1; +} + +extern int +onigenc_single_byte_code_to_mbc_first(OnigCodePoint code) +{ + return (code & 0xff); +} + +extern int +onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf) +{ + *buf = (UChar )(code & 0xff); + return 1; +} + +extern UChar* +onigenc_single_byte_left_adjust_char_head(const UChar* start, const UChar* s) +{ + return (UChar* )s; +} + +extern int +onigenc_always_true_is_allowed_reverse_match(const UChar* s, const UChar* end) +{ + return TRUE; +} + +extern int +onigenc_always_false_is_allowed_reverse_match(const UChar* s, const UChar* end) +{ + return FALSE; +} + +extern OnigCodePoint +onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end) +{ + int c, i, len; + OnigCodePoint n; + + len = enc_len(enc, p); + n = (OnigCodePoint )(*p++); + if (len == 1) return n; + + for (i = 1; i < len; i++) { + if (p >= end) break; + c = *p++; + n <<= 8; n += c; + } + return n; +} + +extern int +onigenc_mbn_mbc_to_normalize(OnigEncoding enc, OnigAmbigType flag, + const UChar** pp, const UChar* end, UChar* lower) +{ + int len; + const UChar *p = *pp; + + if (ONIGENC_IS_MBC_ASCII(p)) { + if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { + *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); + } + else { + *lower = *p; + } + (*pp)++; + return 1; + } + else { + len = enc_len(enc, p); + if (lower != p) { + int i; + for (i = 0; i < len; i++) { + *lower++ = *p++; + } + } + (*pp) += len; + return len; /* return byte length of converted to lower char */ + } +} + +extern int +onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigAmbigType flag, + const UChar** pp, const UChar* end) +{ + const UChar* p = *pp; + + if (ONIGENC_IS_MBC_ASCII(p)) { + (*pp)++; + if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { + return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); + } + else { + return FALSE; + } + } + + (*pp) += enc_len(enc, p); + return FALSE; +} + +extern int +onigenc_mb2_code_to_mbclen(OnigCodePoint code) +{ + if ((code & 0xff00) != 0) return 2; + else return 1; +} + +extern int +onigenc_mb4_code_to_mbclen(OnigCodePoint code) +{ + if ((code & 0xff000000) != 0) return 4; + else if ((code & 0xff0000) != 0) return 3; + else if ((code & 0xff00) != 0) return 2; + else return 1; +} + +extern int +onigenc_mb2_code_to_mbc_first(OnigCodePoint code) +{ + int first; + + if ((code & 0xff00) != 0) { + first = (code >> 8) & 0xff; + } + else { + return (int )code; + } + return first; +} + +extern int +onigenc_mb4_code_to_mbc_first(OnigCodePoint code) +{ + int first; + + if ((code & 0xff000000) != 0) { + first = (code >> 24) & 0xff; + } + else if ((code & 0xff0000) != 0) { + first = (code >> 16) & 0xff; + } + else if ((code & 0xff00) != 0) { + first = (code >> 8) & 0xff; + } + else { + return (int )code; + } + return first; +} + +extern int +onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) +{ + UChar *p = buf; + + if ((code & 0xff00) != 0) { + *p++ = (UChar )((code >> 8) & 0xff); + } + *p++ = (UChar )(code & 0xff); + +#if 1 + if (enc_len(enc, buf) != (p - buf)) + return ONIGENCERR_INVALID_WIDE_CHAR_VALUE; +#endif + return p - buf; +} + +extern int +onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) +{ + UChar *p = buf; + + if ((code & 0xff000000) != 0) { + *p++ = (UChar )((code >> 24) & 0xff); + } + if ((code & 0xff0000) != 0) { + *p++ = (UChar )((code >> 16) & 0xff); + } + if ((code & 0xff00) != 0) { + *p++ = (UChar )((code >> 8) & 0xff); + } + *p++ = (UChar )(code & 0xff); + +#if 1 + if (enc_len(enc, buf) != (p - buf)) + return ONIGENCERR_INVALID_WIDE_CHAR_VALUE; +#endif + return p - buf; +} + +extern int +onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code, + unsigned int ctype) +{ + if ((ctype & ONIGENC_CTYPE_WORD) != 0) { + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else + return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE); + + ctype &= ~ONIGENC_CTYPE_WORD; + if (ctype == 0) return FALSE; + } + + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else + return FALSE; +} + +extern int +onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code, + unsigned int ctype) +{ + if ((ctype & ONIGENC_CTYPE_WORD) != 0) { + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else + return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE); + + ctype &= ~ONIGENC_CTYPE_WORD; + if (ctype == 0) return FALSE; + } + + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else + return FALSE; +} + +extern int +onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end, + const UChar* sascii /* ascii */, int n) +{ + int x, c; + + while (n-- > 0) { + if (p >= end) return (int )(*sascii); + + c = (int )ONIGENC_MBC_TO_CODE(enc, p, end); + x = *sascii - c; + if (x) return x; + + sascii++; + p += enc_len(enc, p); + } + return 0; +} + +#else /* ONIG_RUBY_M17N */ + +extern int +onigenc_is_code_ctype(OnigEncoding enc, OnigCodePoint code, int ctype) +{ + switch (ctype) { + case ONIGENC_CTYPE_NEWLINE: + if (code == 0x0a) return 1; + break; + + case ONIGENC_CTYPE_ALPHA: + return m17n_isalpha(enc, code); + break; + case ONIGENC_CTYPE_BLANK: + return ONIGENC_IS_CODE_BLANK(enc, (int )(code)); + break; + case ONIGENC_CTYPE_CNTRL: + return m17n_iscntrl(enc, code); + break; + case ONIGENC_CTYPE_DIGIT: + return m17n_isdigit(enc, code); + break; + case ONIGENC_CTYPE_GRAPH: + return ONIGENC_IS_CODE_GRAPH(enc, (int )(code)); + break; + case ONIGENC_CTYPE_LOWER: + return m17n_islower(enc, code); + break; + case ONIGENC_CTYPE_PRINT: + return m17n_isprint(enc, code); + break; + case ONIGENC_CTYPE_PUNCT: + return m17n_ispunct(enc, code); + break; + case ONIGENC_CTYPE_SPACE: + return m17n_isspace(enc, code); + break; + case ONIGENC_CTYPE_UPPER: + return m17n_isupper(enc, code); + break; + case ONIGENC_CTYPE_XDIGIT: + return m17n_isxdigit(enc, code); + break; + case ONIGENC_CTYPE_WORD: + return m17n_iswchar(enc, code); + break; + case ONIGENC_CTYPE_ASCII: + return (code < 128 ? TRUE : FALSE); + break; + case ONIGENC_CTYPE_ALNUM: + return m17n_isalnum(enc, code); + break; + default: + break; + } + + return 0; +} + +extern int +onigenc_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) +{ + int c, len; + + m17n_mbcput(enc, code, buf); + c = m17n_firstbyte(enc, code); + len = enc_len(enc, c); + return len; +} + +extern int +onigenc_mbc_to_lower(OnigEncoding enc, UChar* p, UChar* buf) +{ + unsigned int c, low; + + c = m17n_codepoint(enc, p, p + enc_len(enc, *p)); + low = m17n_tolower(enc, c); + m17n_mbcput(enc, low, buf); + + return m17n_codelen(enc, low); +} + +extern int +onigenc_is_mbc_ambiguous(OnigEncoding enc, OnigAmbigType flag, + UChar** pp, UChar* end) +{ + int len; + unsigned int c; + UChar* p = *pp; + + len = enc_len(enc, *p); + (*pp) += len; + c = m17n_codepoint(enc, p, p + len); + + if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { + if (m17n_isupper(enc, c) || m17n_islower(enc, c)) + return TRUE; + } + + return FALSE; +} + +extern UChar* +onigenc_get_left_adjust_char_head(OnigEncoding enc, UChar* start, UChar* s) +{ + UChar *p; + int len; + + if (s <= start) return s; + p = s; + + while (!m17n_islead(enc, *p) && p > start) p--; + while (p + (len = enc_len(enc, *p)) < s) { + p += len; + } + if (p + len == s) return s; + return p; +} + +extern int +onigenc_is_allowed_reverse_match(OnigEncoding enc, + const UChar* s, const UChar* end) +{ + return ONIGENC_IS_SINGLEBYTE(enc); +} + +extern void +onigenc_set_default_caseconv_table(UChar* table) { } + +#endif /* ONIG_RUBY_M17N */ +/********************************************************************** + regerror.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regint.h" +#include <stdio.h> /* for vsnprintf() */ + +#ifdef HAVE_STDARG_PROTOTYPES +#include <stdarg.h> +#define va_init_list(a,b) va_start(a,b) +#else +#include <varargs.h> +#define va_init_list(a,b) va_start(a) +#endif + +extern char* +onig_error_code_to_format(int code) +{ + char *p; + + if (code >= 0) return (char* )0; + + switch (code) { + case ONIG_MISMATCH: + p = "mismatch"; break; + case ONIG_NO_SUPPORT_CONFIG: + p = "no support in this configuration"; break; + case ONIGERR_MEMORY: + p = "fail to memory allocation"; break; + case ONIGERR_MATCH_STACK_LIMIT_OVER: + p = "match-stack limit over"; break; + case ONIGERR_TYPE_BUG: + p = "undefined type (bug)"; break; + case ONIGERR_PARSER_BUG: + p = "internal parser error (bug)"; break; + case ONIGERR_STACK_BUG: + p = "stack error (bug)"; break; + case ONIGERR_UNDEFINED_BYTECODE: + p = "undefined bytecode (bug)"; break; + case ONIGERR_UNEXPECTED_BYTECODE: + p = "unexpected bytecode (bug)"; break; + case ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED: + p = "default multibyte-encoding is not setted"; break; + case ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR: + p = "can't convert to wide-char on specified multibyte-encoding"; break; + case ONIGERR_INVALID_ARGUMENT: + p = "invalid argument"; break; + case ONIGERR_END_PATTERN_AT_LEFT_BRACE: + p = "end pattern at left brace"; break; + case ONIGERR_END_PATTERN_AT_LEFT_BRACKET: + p = "end pattern at left bracket"; break; + case ONIGERR_EMPTY_CHAR_CLASS: + p = "empty char-class"; break; + case ONIGERR_PREMATURE_END_OF_CHAR_CLASS: + p = "premature end of char-class"; break; + case ONIGERR_END_PATTERN_AT_ESCAPE: + p = "end pattern at escape"; break; + case ONIGERR_END_PATTERN_AT_META: + p = "end pattern at meta"; break; + case ONIGERR_END_PATTERN_AT_CONTROL: + p = "end pattern at control"; break; + case ONIGERR_META_CODE_SYNTAX: + p = "illegal meta-code syntax"; break; + case ONIGERR_CONTROL_CODE_SYNTAX: + p = "illegal control-code syntax"; break; + case ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE: + p = "char-class value at end of range"; break; + case ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE: + p = "char-class value at start of range"; break; + case ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS: + p = "unmatched range specifier in char-class"; break; + case ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED: + p = "target of repeat operator is not specified"; break; + case ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID: + p = "target of repeat operator is invalid"; break; + case ONIGERR_NESTED_REPEAT_OPERATOR: + p = "nested repeat operator"; break; + case ONIGERR_UNMATCHED_CLOSE_PARENTHESIS: + p = "unmatched close parenthesis"; break; + case ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS: + p = "end pattern with unmatched parenthesis"; break; + case ONIGERR_END_PATTERN_IN_GROUP: + p = "end pattern in group"; break; + case ONIGERR_UNDEFINED_GROUP_OPTION: + p = "undefined group option"; break; + case ONIGERR_INVALID_POSIX_BRACKET_TYPE: + p = "invalid POSIX bracket type"; break; + case ONIGERR_INVALID_LOOK_BEHIND_PATTERN: + p = "invalid pattern in look-behind"; break; + case ONIGERR_INVALID_REPEAT_RANGE_PATTERN: + p = "invalid repeat range {lower,upper}"; break; + case ONIGERR_TOO_BIG_NUMBER: + p = "too big number"; break; + case ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE: + p = "too big number for repeat range"; break; + case ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE: + p = "upper is smaller than lower in repeat range"; break; + case ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS: + p = "empty range in char class"; break; + case ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE: + p = "mismatch multibyte code length in char-class range"; break; + case ONIGERR_TOO_MANY_MULTI_BYTE_RANGES: + p = "too many multibyte code ranges are specified"; break; + case ONIGERR_TOO_SHORT_MULTI_BYTE_STRING: + p = "too short multibyte code string"; break; + case ONIGERR_TOO_BIG_BACKREF_NUMBER: + p = "too big backref number"; break; + case ONIGERR_INVALID_BACKREF: +#ifdef USE_NAMED_GROUP + p = "invalid backref number/name"; break; +#else + p = "invalid backref number"; break; +#endif + case ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED: + p = "numbered backref/call is not allowed. (use name)"; break; + case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE: + p = "too big wide-char value"; break; + case ONIGERR_TOO_LONG_WIDE_CHAR_VALUE: + p = "too long wide-char value"; break; + case ONIGERR_INVALID_WIDE_CHAR_VALUE: + p = "invalid wide-char value"; break; + case ONIGERR_EMPTY_GROUP_NAME: + p = "group name is empty"; break; + case ONIGERR_INVALID_GROUP_NAME: + p = "invalid group name <%n>"; break; + case ONIGERR_INVALID_CHAR_IN_GROUP_NAME: +#ifdef USE_NAMED_GROUP + p = "invalid char in group name <%n>"; break; +#else + p = "invalid char in group number <%n>"; break; +#endif + case ONIGERR_UNDEFINED_NAME_REFERENCE: + p = "undefined name <%n> reference"; break; + case ONIGERR_UNDEFINED_GROUP_REFERENCE: + p = "undefined group <%n> reference"; break; + case ONIGERR_MULTIPLEX_DEFINED_NAME: + p = "multiplex defined name <%n>"; break; + case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL: + p = "multiplex definition name <%n> call"; break; + case ONIGERR_NEVER_ENDING_RECURSION: + p = "never ending recursion"; break; + case ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY: + p = "group number is too big for capture history"; break; + case ONIGERR_INVALID_CHAR_PROPERTY_NAME: + p = "invalid character property name {%n}"; break; + case ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION: + p = "not supported encoding combination"; break; + case ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT: + p = "over thread pass limit count"; break; + + default: + p = "undefined error code"; break; + } + + return p; +} + + +/* for ONIG_MAX_ERROR_MESSAGE_LEN */ +#define MAX_ERROR_PAR_LEN 30 + +extern int +#ifdef HAVE_STDARG_PROTOTYPES +onig_error_code_to_str(UChar* s, int code, ...) +#else +onig_error_code_to_str(s, code, va_alist) + UChar* s; + int code; + va_dcl +#endif +{ + UChar *p, *q; + OnigErrorInfo* einfo; + int len; + va_list vargs; + + va_init_list(vargs, code); + + switch (code) { + case ONIGERR_UNDEFINED_NAME_REFERENCE: + case ONIGERR_UNDEFINED_GROUP_REFERENCE: + case ONIGERR_MULTIPLEX_DEFINED_NAME: + case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL: + case ONIGERR_INVALID_GROUP_NAME: + case ONIGERR_INVALID_CHAR_IN_GROUP_NAME: + case ONIGERR_INVALID_CHAR_PROPERTY_NAME: + einfo = va_arg(vargs, OnigErrorInfo*); + len = einfo->par_end - einfo->par; + q = onig_error_code_to_format(code); + p = s; + while (*q != '\0') { + if (*q == '%') { + q++; + if (*q == 'n') { /* '%n': name */ + if (len > MAX_ERROR_PAR_LEN) { + xmemcpy(p, einfo->par, MAX_ERROR_PAR_LEN - 3); + p += (MAX_ERROR_PAR_LEN - 3); + xmemcpy(p, "...", 3); + p += 3; + } + else { + xmemcpy(p, einfo->par, len); + p += len; + } + q++; + } + else + goto normal_char; + } + else { + normal_char: + *p++ = *q++; + } + } + *p = '\0'; + len = p - s; + break; + + default: + q = onig_error_code_to_format(code); + len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, q); + xmemcpy(s, q, len); + s[len] = '\0'; + break; + } + + va_end(vargs); + return len; +} + + +void +#ifdef HAVE_STDARG_PROTOTYPES +onig_snprintf_with_pattern(char buf[], int bufsize, OnigEncoding enc, + char* pat, char* pat_end, char *fmt, ...) +#else +onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) + char buf[]; + int bufsize; + OnigEncoding enc; + char* pat; + char* pat_end; + const char *fmt; + va_dcl +#endif +{ + int n, need, len; + UChar *p, *s, *bp; + char bs[6]; + va_list args; + + va_init_list(args, fmt); + n = vsnprintf(buf, bufsize, fmt, args); + va_end(args); + + need = (pat_end - pat) * 4 + 4; + + if (n + need < bufsize) { + strcat(buf, ": /"); + s = buf + onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, buf); + + p = pat; + while (p < (UChar* )pat_end) { + if (*p == MC_ESC(enc)) { + *s++ = *p++; + len = enc_len(enc, p); + while (len-- > 0) *s++ = *p++; + } + else if (*p == '/') { + *s++ = (unsigned char )MC_ESC(enc); + *s++ = *p++; + } + else if (ONIGENC_IS_MBC_HEAD(enc, p)) { + len = enc_len(enc, p); + if (ONIGENC_MBC_MINLEN(enc) == 1) { + while (len-- > 0) *s++ = *p++; + } + else { /* for UTF16 */ + int blen; + + while (len-- > 0) { + sprintf(bs, "\\%03o", *p++ & 0377); + blen = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs); + bp = bs; + while (blen-- > 0) *s++ = *bp++; + } + } + } + else if (!ONIGENC_IS_CODE_PRINT(enc, *p) && + !ONIGENC_IS_CODE_SPACE(enc, *p)) { + sprintf(bs, "\\%03o", *p++ & 0377); + len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs); + bp = bs; + while (len-- > 0) *s++ = *bp++; + } + else { + *s++ = *p++; + } + } + + *s++ = '/'; + *s = '\0'; + } +} +/********************************************************************** + regexec.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regint.h" + +#ifdef USE_CAPTURE_HISTORY +static void history_tree_free(OnigCaptureTreeNode* node); + +static void +history_tree_clear(OnigCaptureTreeNode* node) +{ + int i; + + if (IS_NOT_NULL(node)) { + for (i = 0; i < node->num_childs; i++) { + if (IS_NOT_NULL(node->childs[i])) { + history_tree_free(node->childs[i]); + } + } + for (i = 0; i < node->allocated; i++) { + node->childs[i] = (OnigCaptureTreeNode* )0; + } + node->num_childs = 0; + node->beg = ONIG_REGION_NOTPOS; + node->end = ONIG_REGION_NOTPOS; + node->group = -1; + } +} + +static void +history_tree_free(OnigCaptureTreeNode* node) +{ + history_tree_clear(node); + xfree(node); +} + +static void +history_root_free(OnigRegion* r) +{ + if (IS_NOT_NULL(r->history_root)) { + history_tree_free(r->history_root); + r->history_root = (OnigCaptureTreeNode* )0; + } +} + +static OnigCaptureTreeNode* +history_node_new() +{ + OnigCaptureTreeNode* node; + + node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode)); + CHECK_NULL_RETURN(node); + node->childs = (OnigCaptureTreeNode** )0; + node->allocated = 0; + node->num_childs = 0; + node->group = -1; + node->beg = ONIG_REGION_NOTPOS; + node->end = ONIG_REGION_NOTPOS; + + return node; +} + +static int +history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child) +{ +#define HISTORY_TREE_INIT_ALLOC_SIZE 8 + + if (parent->num_childs >= parent->allocated) { + int n, i; + + if (IS_NULL(parent->childs)) { + n = HISTORY_TREE_INIT_ALLOC_SIZE; + parent->childs = + (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n); + } + else { + n = parent->allocated * 2; + parent->childs = + (OnigCaptureTreeNode** )xrealloc(parent->childs, + sizeof(OnigCaptureTreeNode*) * n); + } + CHECK_NULL_RETURN_VAL(parent->childs, ONIGERR_MEMORY); + for (i = parent->allocated; i < n; i++) { + parent->childs[i] = (OnigCaptureTreeNode* )0; + } + parent->allocated = n; + } + + parent->childs[parent->num_childs] = child; + parent->num_childs++; + return 0; +} + +static OnigCaptureTreeNode* +history_tree_clone(OnigCaptureTreeNode* node) +{ + int i; + OnigCaptureTreeNode *clone, *child; + + clone = history_node_new(); + CHECK_NULL_RETURN(clone); + + clone->beg = node->beg; + clone->end = node->end; + for (i = 0; i < node->num_childs; i++) { + child = history_tree_clone(node->childs[i]); + if (IS_NULL(child)) { + history_tree_free(clone); + return (OnigCaptureTreeNode* )0; + } + history_tree_add_child(clone, child); + } + + return clone; +} + +extern OnigCaptureTreeNode* +onig_get_capture_tree(OnigRegion* region) +{ + return region->history_root; +} +#endif /* USE_CAPTURE_HISTORY */ + +extern void +onig_region_clear(OnigRegion* region) +{ + int i; + + for (i = 0; i < region->num_regs; i++) { + region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; + } +#ifdef USE_CAPTURE_HISTORY + history_root_free(region); +#endif +} + +extern int +onig_region_resize(OnigRegion* region, int n) +{ + region->num_regs = n; + + if (n < ONIG_NREGION) + n = ONIG_NREGION; + + if (region->allocated == 0) { + region->beg = (int* )xmalloc(n * sizeof(int)); + region->end = (int* )xmalloc(n * sizeof(int)); + + if (region->beg == 0 || region->end == 0) + return ONIGERR_MEMORY; + + region->allocated = n; + } + else if (region->allocated < n) { + region->beg = (int* )xrealloc(region->beg, n * sizeof(int)); + region->end = (int* )xrealloc(region->end, n * sizeof(int)); + + if (region->beg == 0 || region->end == 0) + return ONIGERR_MEMORY; + + region->allocated = n; + } + + return 0; +} + +extern int +onig_region_resize_clear(OnigRegion* region, int n) +{ + int r; + + r = onig_region_resize(region, n); + if (r != 0) return r; + onig_region_clear(region); + return 0; +} + +extern int +onig_region_set(OnigRegion* region, int at, int beg, int end) +{ + if (at < 0) return ONIGERR_INVALID_ARGUMENT; + + if (at >= region->allocated) { + int r = onig_region_resize(region, at + 1); + if (r < 0) return r; + } + + region->beg[at] = beg; + region->end[at] = end; + return 0; +} + +extern void +onig_region_init(OnigRegion* region) +{ + region->num_regs = 0; + region->allocated = 0; + region->beg = (int* )0; + region->end = (int* )0; + region->history_root = (OnigCaptureTreeNode* )0; +} + +extern OnigRegion* +onig_region_new() +{ + OnigRegion* r; + + r = (OnigRegion* )xmalloc(sizeof(OnigRegion)); + onig_region_init(r); + return r; +} + +extern void +onig_region_free(OnigRegion* r, int free_self) +{ + if (r) { + if (r->allocated > 0) { + if (r->beg) xfree(r->beg); + if (r->end) xfree(r->end); + r->allocated = 0; + } +#ifdef USE_CAPTURE_HISTORY + history_root_free(r); +#endif + if (free_self) xfree(r); + } +} + +extern void +onig_region_copy(OnigRegion* to, OnigRegion* from) +{ +#define RREGC_SIZE (sizeof(int) * from->num_regs) + int i; + + if (to == from) return; + + if (to->allocated == 0) { + if (from->num_regs > 0) { + to->beg = (int* )xmalloc(RREGC_SIZE); + to->end = (int* )xmalloc(RREGC_SIZE); + to->allocated = from->num_regs; + } + } + else if (to->allocated < from->num_regs) { + to->beg = (int* )xrealloc(to->beg, RREGC_SIZE); + to->end = (int* )xrealloc(to->end, RREGC_SIZE); + to->allocated = from->num_regs; + } + + for (i = 0; i < from->num_regs; i++) { + to->beg[i] = from->beg[i]; + to->end[i] = from->end[i]; + } + to->num_regs = from->num_regs; + +#ifdef USE_CAPTURE_HISTORY + history_root_free(to); + + if (IS_NOT_NULL(from->history_root)) { + to->history_root = history_tree_clone(from->history_root); + } +#endif +} + + +/** stack **/ +#define INVALID_STACK_INDEX -1 +typedef long StackIndex; + +typedef struct _StackType { + unsigned int type; + union { + struct { + UChar *pcode; /* byte code position */ + UChar *pstr; /* string position */ + UChar *pstr_prev; /* previous char position of pstr */ + } state; + struct { + int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */ + UChar *pcode; /* byte code position (head of repeated target) */ + int num; /* repeat id */ + } repeat; + struct { + StackIndex si; /* index of stack */ + } repeat_inc; + struct { + int num; /* memory num */ + UChar *pstr; /* start/end position */ + /* Following information is setted, if this stack type is MEM-START */ + StackIndex start; /* prev. info (for backtrack "(...)*" ) */ + StackIndex end; /* prev. info (for backtrack "(...)*" ) */ + } mem; + struct { + int num; /* null check id */ + UChar *pstr; /* start position */ + } null_check; +#ifdef USE_SUBEXP_CALL + struct { + UChar *ret_addr; /* byte code position */ + int num; /* null check id */ + UChar *pstr; /* string position */ + } call_frame; +#endif + } u; +} StackType; + +/* stack type */ +/* used by normal-POP */ +#define STK_ALT 0x0001 +#define STK_LOOK_BEHIND_NOT 0x0003 +#define STK_POS_NOT 0x0005 +/* avoided by normal-POP, but value should be small */ +#define STK_NULL_CHECK_START 0x0100 +/* handled by normal-POP */ +#define STK_MEM_START 0x0200 +#define STK_MEM_END 0x0300 +#define STK_REPEAT_INC 0x0400 +/* avoided by normal-POP */ +#define STK_POS 0x0500 /* used when POP-POS */ +#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */ +#define STK_REPEAT 0x0700 +#define STK_CALL_FRAME 0x0800 +#define STK_RETURN 0x0900 +#define STK_MEM_END_MARK 0x0a00 +#define STK_VOID 0x0b00 /* for fill a blank */ +#define STK_NULL_CHECK_END 0x0c00 /* for recursive call */ + +/* stack type check mask */ +#define STK_MASK_POP_USED 0x00ff +#define IS_TO_VOID_TARGET(stk) \ + (((stk)->type & STK_MASK_POP_USED) || (stk)->type == STK_NULL_CHECK_START) + +typedef struct { + void* stack_p; + int stack_n; + OnigOptionType options; + OnigRegion* region; + const UChar* start; /* search start position (for \G: BEGIN_POSITION) */ +} MatchArg; + +#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\ + (msa).stack_p = (void* )0;\ + (msa).options = (arg_option);\ + (msa).region = (arg_region);\ + (msa).start = (arg_start);\ +} while (0) + +#define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p) + + +#define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\ + if (msa->stack_p) {\ + alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num));\ + stk_alloc = (StackType* )(msa->stack_p);\ + stk_base = stk_alloc;\ + stk = stk_base;\ + stk_end = stk_base + msa->stack_n;\ + }\ + else {\ + alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num)\ + + sizeof(StackType) * (stack_num));\ + stk_alloc = (StackType* )(alloc_addr + sizeof(char*) * (ptr_num));\ + stk_base = stk_alloc;\ + stk = stk_base;\ + stk_end = stk_base + (stack_num);\ + }\ +} while(0) + +#define STACK_SAVE do{\ + if (stk_base != stk_alloc) {\ + msa->stack_p = stk_base;\ + msa->stack_n = stk_end - stk_base;\ + };\ +} while(0) + +static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE; + +extern unsigned int +onig_get_match_stack_limit_size(void) +{ + return MatchStackLimitSize; +} + +extern int +onig_set_match_stack_limit_size(unsigned int size) +{ + MatchStackLimitSize = size; + return 0; +} + +static int +stack_double(StackType** arg_stk_base, StackType** arg_stk_end, + StackType** arg_stk, StackType* stk_alloc, MatchArg* msa) +{ + unsigned int n; + StackType *x, *stk_base, *stk_end, *stk; + + stk_base = *arg_stk_base; + stk_end = *arg_stk_end; + stk = *arg_stk; + + n = stk_end - stk_base; + if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) { + x = (StackType* )xmalloc(sizeof(StackType) * n * 2); + if (IS_NULL(x)) { + STACK_SAVE; + return ONIGERR_MEMORY; + } + xmemcpy(x, stk_base, n * sizeof(StackType)); + n *= 2; + } + else { + n *= 2; + if (MatchStackLimitSize != 0 && n > MatchStackLimitSize) { + if ((unsigned int )(stk_end - stk_base) == MatchStackLimitSize) + return ONIGERR_MATCH_STACK_LIMIT_OVER; + else + n = MatchStackLimitSize; + } + x = (StackType* )xrealloc(stk_base, sizeof(StackType) * n); + if (IS_NULL(x)) { + STACK_SAVE; + return ONIGERR_MEMORY; + } + } + *arg_stk = x + (stk - stk_base); + *arg_stk_base = x; + *arg_stk_end = x + n; + return 0; +} + +#define STACK_ENSURE(n) do {\ + if (stk_end - stk < (n)) {\ + int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\ + if (r != 0) { STACK_SAVE; return r; } \ + }\ +} while(0) + +#define STACK_AT(index) (stk_base + (index)) +#define GET_STACK_INDEX(stk) ((stk) - stk_base) + +#define STACK_PUSH(stack_type,pat,s,sprev) do {\ + STACK_ENSURE(1);\ + stk->type = (stack_type);\ + stk->u.state.pcode = (pat);\ + stk->u.state.pstr = (s);\ + stk->u.state.pstr_prev = (sprev);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_ENSURED(stack_type,pat) do {\ + stk->type = (stack_type);\ + stk->u.state.pcode = (pat);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_TYPE(stack_type) do {\ + STACK_ENSURE(1);\ + stk->type = (stack_type);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev) +#define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev) +#define STACK_PUSH_POS_NOT(pat,s,sprev) STACK_PUSH(STK_POS_NOT,pat,s,sprev) +#define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT) +#define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev) \ + STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev) + +#define STACK_PUSH_REPEAT(id, pat) do {\ + STACK_ENSURE(1);\ + stk->type = STK_REPEAT;\ + stk->u.repeat.num = (id);\ + stk->u.repeat.pcode = (pat);\ + stk->u.repeat.count = 0;\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_REPEAT_INC(sindex) do {\ + STACK_ENSURE(1);\ + stk->type = STK_REPEAT_INC;\ + stk->u.repeat_inc.si = (sindex);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_MEM_START(mnum, s) do {\ + STACK_ENSURE(1);\ + stk->type = STK_MEM_START;\ + stk->u.mem.num = (mnum);\ + stk->u.mem.pstr = (s);\ + stk->u.mem.start = mem_start_stk[mnum];\ + stk->u.mem.end = mem_end_stk[mnum];\ + mem_start_stk[mnum] = GET_STACK_INDEX(stk);\ + mem_end_stk[mnum] = INVALID_STACK_INDEX;\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_MEM_END(mnum, s) do {\ + STACK_ENSURE(1);\ + stk->type = STK_MEM_END;\ + stk->u.mem.num = (mnum);\ + stk->u.mem.pstr = (s);\ + stk->u.mem.start = mem_start_stk[mnum];\ + stk->u.mem.end = mem_end_stk[mnum];\ + mem_end_stk[mnum] = GET_STACK_INDEX(stk);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_MEM_END_MARK(mnum) do {\ + STACK_ENSURE(1);\ + stk->type = STK_MEM_END_MARK;\ + stk->u.mem.num = (mnum);\ + STACK_INC;\ +} while(0) + +#define STACK_GET_MEM_START(mnum, k) do {\ + int level = 0;\ + k = stk;\ + while (k > stk_base) {\ + k--;\ + if ((k->type == STK_MEM_END_MARK || k->type == STK_MEM_END) \ + && k->u.mem.num == (mnum)) {\ + level++;\ + }\ + else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\ + if (level == 0) break;\ + level--;\ + }\ + }\ +} while (0) + +#define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\ + int level = 0;\ + while (k < stk) {\ + if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\ + if (level == 0) (start) = k->u.mem.pstr;\ + level++;\ + }\ + else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\ + level--;\ + if (level == 0) {\ + (end) = k->u.mem.pstr;\ + break;\ + }\ + }\ + k++;\ + }\ +} while (0) + +#define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\ + STACK_ENSURE(1);\ + stk->type = STK_NULL_CHECK_START;\ + stk->u.null_check.num = (cnum);\ + stk->u.null_check.pstr = (s);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_NULL_CHECK_END(cnum) do {\ + STACK_ENSURE(1);\ + stk->type = STK_NULL_CHECK_END;\ + stk->u.null_check.num = (cnum);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_CALL_FRAME(pat) do {\ + STACK_ENSURE(1);\ + stk->type = STK_CALL_FRAME;\ + stk->u.call_frame.ret_addr = (pat);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_RETURN do {\ + STACK_ENSURE(1);\ + stk->type = STK_RETURN;\ + STACK_INC;\ +} while(0) + + +#ifdef ONIG_DEBUG +#define STACK_BASE_CHECK(p) \ + if ((p) < stk_base) goto stack_error; +#else +#define STACK_BASE_CHECK(p) +#endif + +#define STACK_POP_ONE do {\ + stk--;\ + STACK_BASE_CHECK(stk); \ +} while(0) + +#define STACK_POP do {\ + switch (pop_level) {\ + case STACK_POP_LEVEL_FREE:\ + while (1) {\ + stk--;\ + STACK_BASE_CHECK(stk); \ + if ((stk->type & STK_MASK_POP_USED) != 0) break;\ + }\ + break;\ + case STACK_POP_LEVEL_MEM_START:\ + while (1) {\ + stk--;\ + STACK_BASE_CHECK(stk); \ + if ((stk->type & STK_MASK_POP_USED) != 0) break;\ + else if (stk->type == STK_MEM_START) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + }\ + break;\ + default:\ + while (1) {\ + stk--;\ + STACK_BASE_CHECK(stk); \ + if ((stk->type & STK_MASK_POP_USED) != 0) break;\ + else if (stk->type == STK_MEM_START) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + else if (stk->type == STK_REPEAT_INC) {\ + STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ + }\ + else if (stk->type == STK_MEM_END) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + }\ + break;\ + }\ +} while(0) + +#define STACK_POP_TIL_POS_NOT do {\ + while (1) {\ + stk--;\ + STACK_BASE_CHECK(stk); \ + if (stk->type == STK_POS_NOT) break;\ + else if (stk->type == STK_MEM_START) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + else if (stk->type == STK_REPEAT_INC) {\ + STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ + }\ + else if (stk->type == STK_MEM_END) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + }\ +} while(0) + +#define STACK_POP_TIL_LOOK_BEHIND_NOT do {\ + while (1) {\ + stk--;\ + STACK_BASE_CHECK(stk); \ + if (stk->type == STK_LOOK_BEHIND_NOT) break;\ + else if (stk->type == STK_MEM_START) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + else if (stk->type == STK_REPEAT_INC) {\ + STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ + }\ + else if (stk->type == STK_MEM_END) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + }\ +} while(0) + +#define STACK_POS_END(k) do {\ + k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k); \ + if (IS_TO_VOID_TARGET(k)) {\ + k->type = STK_VOID;\ + }\ + else if (k->type == STK_POS) {\ + k->type = STK_VOID;\ + break;\ + }\ + }\ +} while(0) + +#define STACK_STOP_BT_END do {\ + StackType *k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k); \ + if (IS_TO_VOID_TARGET(k)) {\ + k->type = STK_VOID;\ + }\ + else if (k->type == STK_STOP_BT) {\ + k->type = STK_VOID;\ + break;\ + }\ + }\ +} while(0) + +#define STACK_NULL_CHECK(isnull,id,s) do {\ + StackType* k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k); \ + if (k->type == STK_NULL_CHECK_START) {\ + if (k->u.null_check.num == (id)) {\ + (isnull) = (k->u.null_check.pstr == (s));\ + break;\ + }\ + }\ + }\ +} while(0) + +#define STACK_NULL_CHECK_REC(isnull,id,s) do {\ + int level = 0;\ + StackType* k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k); \ + if (k->type == STK_NULL_CHECK_START) {\ + if (k->u.null_check.num == (id)) {\ + if (level == 0) {\ + (isnull) = (k->u.null_check.pstr == (s));\ + break;\ + }\ + else level--;\ + }\ + }\ + else if (k->type == STK_NULL_CHECK_END) {\ + level++;\ + }\ + }\ +} while(0) + +#define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\ + StackType* k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k); \ + if (k->type == STK_NULL_CHECK_START) {\ + if (k->u.null_check.num == (id)) {\ + if (k->u.null_check.pstr != (s)) {\ + (isnull) = 0;\ + break;\ + }\ + else {\ + UChar* endp;\ + (isnull) = 1;\ + while (k < stk) {\ + if (k->type == STK_MEM_START) {\ + if (k->u.mem.end == INVALID_STACK_INDEX) {\ + (isnull) = 0; break;\ + }\ + if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ + endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ + else\ + endp = (UChar* )k->u.mem.end;\ + if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\ + (isnull) = 0; break;\ + }\ + else if (endp != s) {\ + (isnull) = -1; /* empty, but position changed */ \ + }\ + }\ + k++;\ + }\ + break;\ + }\ + }\ + }\ + }\ +} while(0) + +#define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\ + int level = 0;\ + StackType* k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k); \ + if (k->type == STK_NULL_CHECK_START) {\ + if (k->u.null_check.num == (id)) {\ + if (level == 0) {\ + if (k->u.null_check.pstr != (s)) {\ + (isnull) = 0;\ + break;\ + }\ + else {\ + UChar* endp;\ + (isnull) = 1;\ + while (k < stk) {\ + if (k->type == STK_MEM_START) {\ + if (k->u.mem.end == INVALID_STACK_INDEX) {\ + (isnull) = 0; break;\ + }\ + if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ + endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ + else\ + endp = (UChar* )k->u.mem.end;\ + if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\ + (isnull) = 0; break;\ + }\ + else if (endp != s) {\ + (isnull) = -1; /* empty, but position changed */ \ + }\ + }\ + k++;\ + }\ + break;\ + }\ + }\ + else {\ + level--;\ + }\ + }\ + }\ + else if (k->type == STK_NULL_CHECK_END) {\ + if (k->u.null_check.num == (id)) level++;\ + }\ + }\ +} while(0) + +#define STACK_GET_REPEAT(id, k) do {\ + int level = 0;\ + k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k); \ + if (k->type == STK_REPEAT) {\ + if (level == 0) {\ + if (k->u.repeat.num == (id)) {\ + break;\ + }\ + }\ + }\ + else if (k->type == STK_CALL_FRAME) level--;\ + else if (k->type == STK_RETURN) level++;\ + }\ +} while (0) + +#define STACK_RETURN(addr) do {\ + int level = 0;\ + StackType* k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k); \ + if (k->type == STK_CALL_FRAME) {\ + if (level == 0) {\ + (addr) = k->u.call_frame.ret_addr;\ + break;\ + }\ + else level--;\ + }\ + else if (k->type == STK_RETURN)\ + level++;\ + }\ +} while(0) + + +#define STRING_CMP(s1,s2,len) do {\ + while (len-- > 0) {\ + if (*s1++ != *s2++) goto fail;\ + }\ +} while(0) + +#define STRING_CMP_IC(ambig_flag,s1,ps2,len) do {\ + if (string_cmp_ic(encode, ambig_flag, s1, ps2, len) == 0) \ + goto fail; \ +} while(0) + +static int string_cmp_ic(OnigEncoding enc, int ambig_flag, + UChar* s1, UChar** ps2, int mblen) +{ + UChar buf1[ONIGENC_MBC_NORMALIZE_MAXLEN]; + UChar buf2[ONIGENC_MBC_NORMALIZE_MAXLEN]; + UChar *p1, *p2, *end, *s2, *end2; + int len1, len2; + + s2 = *ps2; + end = s1 + mblen; + end2 = s2 + mblen; + while (s1 < end) { + len1 = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s1, end, buf1); + len2 = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s2, end2, buf2); + if (len1 != len2) return 0; + p1 = buf1; + p2 = buf2; + while (len1-- > 0) { + if (*p1 != *p2) return 0; + p1++; + p2++; + } + } + + *ps2 = s2; + return 1; +} + +#define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\ + is_fail = 0;\ + while (len-- > 0) {\ + if (*s1++ != *s2++) {\ + is_fail = 1; break;\ + }\ + }\ +} while(0) + +#define STRING_CMP_VALUE_IC(ambig_flag,s1,ps2,len,is_fail) do {\ + if (string_cmp_ic(encode, ambig_flag, s1, ps2, len) == 0) \ + is_fail = 1; \ + else \ + is_fail = 0; \ +} while(0) + +#define ON_STR_BEGIN(s) ((s) == str) +#define ON_STR_END(s) ((s) == end) +#define IS_EMPTY_STR (str == end) + +#define DATA_ENSURE(n) \ + if (s + (n) > end) goto fail + +#define DATA_ENSURE_CHECK(n) (s + (n) <= end) + +#ifdef USE_CAPTURE_HISTORY +static int +make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp, + StackType* stk_top, UChar* str, regex_t* reg) +{ + int n, r; + OnigCaptureTreeNode* child; + StackType* k = *kp; + + while (k < stk_top) { + if (k->type == STK_MEM_START) { + n = k->u.mem.num; + if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP && + BIT_STATUS_AT(reg->capture_history, n) != 0) { + child = history_node_new(); + CHECK_NULL_RETURN_VAL(child, ONIGERR_MEMORY); + child->group = n; + child->beg = (int )(k->u.mem.pstr - str); + r = history_tree_add_child(node, child); + if (r != 0) return r; + *kp = (k + 1); + r = make_capture_history_tree(child, kp, stk_top, str, reg); + if (r != 0) return r; + + k = *kp; + child->end = (int )(k->u.mem.pstr - str); + } + } + else if (k->type == STK_MEM_END) { + if (k->u.mem.num == node->group) { + node->end = (int )(k->u.mem.pstr - str); + *kp = k; + return 0; + } + } + k++; + } + + return 1; /* 1: root node ending. */ +} +#endif + +#ifdef RUBY_PLATFORM + +typedef struct { + int state; + regex_t* reg; + MatchArg* msa; + StackType* stk_base; +} TrapEnsureArg; + +static VALUE +trap_ensure(VALUE arg) +{ + TrapEnsureArg* ta = (TrapEnsureArg* )arg; + + if (ta->state == 0) { /* trap_exec() is not normal return */ + ONIG_STATE_DEC(ta->reg); + if (! IS_NULL(ta->msa->stack_p) && ta->stk_base != ta->msa->stack_p) + xfree(ta->stk_base); + + MATCH_ARG_FREE(*(ta->msa)); + } + + return Qnil; +} + +static VALUE +trap_exec(VALUE arg) +{ + TrapEnsureArg* ta; + + rb_trap_exec(); + + ta = (TrapEnsureArg* )arg; + ta->state = 1; /* normal return */ + return Qnil; +} + +extern void +onig_exec_trap(regex_t* reg, MatchArg* msa, StackType* stk_base) +{ + VALUE arg; + TrapEnsureArg ta; + + ta.state = 0; + ta.reg = reg; + ta.msa = msa; + ta.stk_base = stk_base; + arg = (VALUE )(&ta); + rb_ensure(trap_exec, arg, trap_ensure, arg); +} + +#define CHECK_INTERRUPT_IN_MATCH_AT do {\ + if (rb_trap_pending) {\ + if (! rb_prohibit_interrupt) {\ + onig_exec_trap(reg, msa, stk_base);\ + }\ + }\ +} while (0) +#else +#define CHECK_INTERRUPT_IN_MATCH_AT +#endif /* RUBY_PLATFORM */ + +#ifdef ONIG_DEBUG_STATISTICS + +#define USE_TIMEOFDAY + +#ifdef USE_TIMEOFDAY +#ifdef HAVE_SYS_TIME_H +#include <sys/time.h> +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +static struct timeval ts, te; +#define GETTIME(t) gettimeofday(&(t), (struct timezone* )0) +#define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \ + (((te).tv_sec - (ts).tv_sec)*1000000)) +#else +#ifdef HAVE_SYS_TIMES_H +#include <sys/times.h> +#endif +static struct tms ts, te; +#define GETTIME(t) times(&(t)) +#define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime) +#endif + +static int OpCounter[256]; +static int OpPrevCounter[256]; +static unsigned long OpTime[256]; +static int OpCurr = OP_FINISH; +static int OpPrevTarget = OP_FAIL; +static int MaxStackDepth = 0; + +#define STAT_OP_IN(opcode) do {\ + if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\ + OpCurr = opcode;\ + OpCounter[opcode]++;\ + GETTIME(ts);\ +} while (0) + +#define STAT_OP_OUT do {\ + GETTIME(te);\ + OpTime[OpCurr] += TIMEDIFF(te, ts);\ +} while (0) + +#ifdef RUBY_PLATFORM + +/* + * :nodoc: + */ +static VALUE onig_stat_print() +{ + onig_print_statistics(stderr); + return Qnil; +} +#endif + +extern void onig_statistics_init() +{ + int i; + for (i = 0; i < 256; i++) { + OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0; + } + MaxStackDepth = 0; + +#ifdef RUBY_PLATFORM + rb_define_global_function("onig_stat_print", onig_stat_print, 0); +#endif +} + +extern void +onig_print_statistics(FILE* f) +{ + int i; + fprintf(f, " count prev time\n"); + for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { + fprintf(f, "%8d: %8d: %10ld: %s\n", + OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name); + } + fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth); +} + +#define STACK_INC do {\ + stk++;\ + if (stk - stk_base > MaxStackDepth) \ + MaxStackDepth = stk - stk_base;\ +} while (0) + +#else +#define STACK_INC stk++ + +#define STAT_OP_IN(opcode) +#define STAT_OP_OUT +#endif + +extern int +onig_is_in_code_range(const UChar* p, OnigCodePoint code) +{ + OnigCodePoint n, *data; + OnigCodePoint low, high, x; + + GET_CODE_POINT(n, p); + data = (OnigCodePoint* )p; + data++; + + for (low = 0, high = n; low < high; ) { + x = (low + high) >> 1; + if (code > data[x * 2 + 1]) + low = x + 1; + else + high = x; + } + + return ((low < n && code >= data[low * 2]) ? 1 : 0); +} + +static int +code_is_in_cclass_node(void* node, OnigCodePoint code, int enclen) +{ + unsigned int in_cc; + CClassNode* cc = (CClassNode* )node; + + if (enclen == 1) { + in_cc = BITSET_AT(cc->bs, code); + } + else { + UChar* p = ((BBuf* )(cc->mbuf))->p; + in_cc = onig_is_in_code_range(p, code); + } + + if (IS_CCLASS_NOT(cc)) { + return (in_cc ? 0 : 1); + } + else { + return (in_cc ? 1 : 0); + } +} + +/* matching region of POSIX API */ +typedef int regoff_t; + +typedef struct { + regoff_t rm_so; + regoff_t rm_eo; +} posix_regmatch_t; + +/* match data(str - end) from position (sstart). */ +/* if sstart == str then set sprev to NULL. */ +static int +match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, + UChar* sprev, MatchArg* msa) +{ + static UChar FinishCode[] = { OP_FINISH }; + + int i, n, num_mem, best_len, pop_level; + LengthType tlen, tlen2; + MemNumType mem; + RelAddrType addr; + OnigOptionType option = reg->options; + OnigEncoding encode = reg->enc; + OnigAmbigType ambig_flag = reg->ambig_flag; + UChar *s, *q, *sbegin; + UChar *p = reg->p; + char *alloca_base; + StackType *stk_alloc, *stk_base, *stk, *stk_end; + StackType *stkp; /* used as any purpose. */ + StackIndex si; + StackIndex *repeat_stk; + StackIndex *mem_start_stk, *mem_end_stk; + n = reg->num_repeat + reg->num_mem * 2; + + STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE); + pop_level = reg->stack_pop_level; + num_mem = reg->num_mem; + repeat_stk = (StackIndex* )alloca_base; + + mem_start_stk = (StackIndex* )(repeat_stk + reg->num_repeat); + mem_end_stk = mem_start_stk + num_mem; + mem_start_stk--; /* for index start from 1, + mem_start_stk[1]..mem_start_stk[num_mem] */ + mem_end_stk--; /* for index start from 1, + mem_end_stk[1]..mem_end_stk[num_mem] */ + for (i = 1; i <= num_mem; i++) { + mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX; + } + +#ifdef ONIG_DEBUG_MATCH + fprintf(stderr, "match_at: str: %d, end: %d, start: %d, sprev: %d\n", + (int )str, (int )end, (int )sstart, (int )sprev); + fprintf(stderr, "size: %d, start offset: %d\n", + (int )(end - str), (int )(sstart - str)); +#endif + + STACK_PUSH_ENSURED(STK_ALT, FinishCode); /* bottom stack */ + best_len = ONIG_MISMATCH; + s = (UChar* )sstart; + while (1) { +#ifdef ONIG_DEBUG_MATCH + { + UChar *q, *bp, buf[50]; + int len; + fprintf(stderr, "%4d> \"", (int )(s - str)); + bp = buf; + for (i = 0, q = s; i < 7 && q < end; i++) { + len = enc_len(encode, q); + while (len-- > 0) *bp++ = *q++; + } + if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; } + else { xmemcpy(bp, "\"", 1); bp += 1; } + *bp = 0; + fputs(buf, stderr); + for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); + onig_print_compiled_byte_code(stderr, p, NULL, encode); + fprintf(stderr, "\n"); + } +#endif + + sbegin = s; + switch (*p++) { + case OP_END: STAT_OP_IN(OP_END); + n = s - sstart; + if (n > best_len) { + OnigRegion* region = msa->region; + best_len = n; + if (region) { +#ifdef USE_POSIX_REGION_OPTION + if (IS_POSIX_REGION(msa->options)) { + posix_regmatch_t* rmt = (posix_regmatch_t* )region; + + rmt[0].rm_so = sstart - str; + rmt[0].rm_eo = s - str; + for (i = 1; i <= num_mem; i++) { + if (mem_end_stk[i] != INVALID_STACK_INDEX) { + if (BIT_STATUS_AT(reg->bt_mem_start, i)) + rmt[i].rm_so = STACK_AT(mem_start_stk[i])->u.mem.pstr - str; + else + rmt[i].rm_so = (UChar* )((void* )(mem_start_stk[i])) - str; + + rmt[i].rm_eo = (BIT_STATUS_AT(reg->bt_mem_end, i) + ? STACK_AT(mem_end_stk[i])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[i])) - str; + } + else { + rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS; + } + } + } + else { +#endif /* USE_POSIX_REGION_OPTION */ + region->beg[0] = sstart - str; + region->end[0] = s - str; + for (i = 1; i <= num_mem; i++) { + if (mem_end_stk[i] != INVALID_STACK_INDEX) { + if (BIT_STATUS_AT(reg->bt_mem_start, i)) + region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str; + else + region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str; + + region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i) + ? STACK_AT(mem_end_stk[i])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[i])) - str; + } + else { + region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; + } + } + +#ifdef USE_CAPTURE_HISTORY + if (reg->capture_history != 0) { + int r; + OnigCaptureTreeNode* node; + + if (IS_NULL(region->history_root)) { + region->history_root = node = history_node_new(); + CHECK_NULL_RETURN_VAL(node, ONIGERR_MEMORY); + } + else { + node = region->history_root; + history_tree_clear(node); + } + + node->group = 0; + node->beg = sstart - str; + node->end = s - str; + + stkp = stk_base; + r = make_capture_history_tree(region->history_root, &stkp, + stk, (UChar* )str, reg); + if (r < 0) { + best_len = r; /* error code */ + goto finish; + } + } +#endif /* USE_CAPTURE_HISTORY */ +#ifdef USE_POSIX_REGION_OPTION + } /* else IS_POSIX_REGION() */ +#endif + } /* if (region) */ + } /* n > best_len */ + STAT_OP_OUT; + + if (IS_FIND_CONDITION(option)) { + if (IS_FIND_NOT_EMPTY(option) && s == sstart) { + best_len = ONIG_MISMATCH; + goto fail; /* for retry */ + } + if (IS_FIND_LONGEST(option) && s < end) { + goto fail; /* for retry */ + } + } + + /* default behavior: return first-matching result. */ + goto finish; + break; + + case OP_EXACT1: STAT_OP_IN(OP_EXACT1); +#if 0 + DATA_ENSURE(1); + if (*p != *s) goto fail; + p++; s++; +#endif + if (*p != *s++) goto fail; + DATA_ENSURE(0); + p++; + STAT_OP_OUT; + break; + + case OP_EXACT1_IC: STAT_OP_IN(OP_EXACT1_IC); + { + int len; + UChar *q, *ss, *sp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN]; + + DATA_ENSURE(1); + ss = s; + sp = p; + + exact1_ic_retry: + len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf); + DATA_ENSURE(0); + q = lowbuf; + while (len-- > 0) { + if (*p != *q) { +#if 1 + if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { + ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND; + s = ss; + p = sp; + goto exact1_ic_retry; + } + else + goto fail; +#else + goto fail; +#endif + } + p++; q++; + } + } + STAT_OP_OUT; + break; + + case OP_EXACT2: STAT_OP_IN(OP_EXACT2); + DATA_ENSURE(2); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + sprev = s; + p++; s++; + STAT_OP_OUT; + continue; + break; + + case OP_EXACT3: STAT_OP_IN(OP_EXACT3); + DATA_ENSURE(3); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + sprev = s; + p++; s++; + STAT_OP_OUT; + continue; + break; + + case OP_EXACT4: STAT_OP_IN(OP_EXACT4); + DATA_ENSURE(4); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + sprev = s; + p++; s++; + STAT_OP_OUT; + continue; + break; + + case OP_EXACT5: STAT_OP_IN(OP_EXACT5); + DATA_ENSURE(5); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + sprev = s; + p++; s++; + STAT_OP_OUT; + continue; + break; + + case OP_EXACTN: STAT_OP_IN(OP_EXACTN); + GET_LENGTH_INC(tlen, p); + DATA_ENSURE(tlen); + while (tlen-- > 0) { + if (*p++ != *s++) goto fail; + } + sprev = s - 1; + STAT_OP_OUT; + continue; + break; + + case OP_EXACTN_IC: STAT_OP_IN(OP_EXACTN_IC); + { + int len; + UChar *ss, *sp, *q, *endp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN]; + + GET_LENGTH_INC(tlen, p); + endp = p + tlen; + + while (p < endp) { + sprev = s; + DATA_ENSURE(1); + ss = s; + sp = p; + + exactn_ic_retry: + len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf); + DATA_ENSURE(0); + q = lowbuf; + while (len-- > 0) { + if (*p != *q) { +#if 1 + if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { + ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND; + s = ss; + p = sp; + goto exactn_ic_retry; + } + else + goto fail; +#else + goto fail; +#endif + } + p++; q++; + } + } + } + + STAT_OP_OUT; + continue; + break; + + case OP_EXACTMB2N1: STAT_OP_IN(OP_EXACTMB2N1); + DATA_ENSURE(2); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + STAT_OP_OUT; + break; + + case OP_EXACTMB2N2: STAT_OP_IN(OP_EXACTMB2N2); + DATA_ENSURE(4); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + sprev = s; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + STAT_OP_OUT; + continue; + break; + + case OP_EXACTMB2N3: STAT_OP_IN(OP_EXACTMB2N3); + DATA_ENSURE(6); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + sprev = s; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + STAT_OP_OUT; + continue; + break; + + case OP_EXACTMB2N: STAT_OP_IN(OP_EXACTMB2N); + GET_LENGTH_INC(tlen, p); + DATA_ENSURE(tlen * 2); + while (tlen-- > 0) { + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + } + sprev = s - 2; + STAT_OP_OUT; + continue; + break; + + case OP_EXACTMB3N: STAT_OP_IN(OP_EXACTMB3N); + GET_LENGTH_INC(tlen, p); + DATA_ENSURE(tlen * 3); + while (tlen-- > 0) { + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + } + sprev = s - 3; + STAT_OP_OUT; + continue; + break; + + case OP_EXACTMBN: STAT_OP_IN(OP_EXACTMBN); + GET_LENGTH_INC(tlen, p); /* mb-len */ + GET_LENGTH_INC(tlen2, p); /* string len */ + tlen2 *= tlen; + DATA_ENSURE(tlen2); + while (tlen2-- > 0) { + if (*p != *s) goto fail; + p++; s++; + } + sprev = s - tlen; + STAT_OP_OUT; + continue; + break; + + case OP_CCLASS: STAT_OP_IN(OP_CCLASS); + DATA_ENSURE(1); + if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail; + p += SIZE_BITSET; + s += enc_len(encode, s); /* OP_CCLASS can match mb-code. \D, \S */ + STAT_OP_OUT; + break; + + case OP_CCLASS_MB: STAT_OP_IN(OP_CCLASS_MB); + if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail; + + cclass_mb: + GET_LENGTH_INC(tlen, p); + { + OnigCodePoint code; + UChar *ss; + int mb_len; + + DATA_ENSURE(1); + mb_len = enc_len(encode, s); + DATA_ENSURE(mb_len); + ss = s; + s += mb_len; + code = ONIGENC_MBC_TO_CODE(encode, ss, s); + +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS + if (! onig_is_in_code_range(p, code)) goto fail; +#else + q = p; + ALIGNMENT_RIGHT(q); + if (! onig_is_in_code_range(q, code)) goto fail; +#endif + } + p += tlen; + STAT_OP_OUT; + break; + + case OP_CCLASS_MIX: STAT_OP_IN(OP_CCLASS_MIX); + DATA_ENSURE(1); + if (ONIGENC_IS_MBC_HEAD(encode, s)) { + p += SIZE_BITSET; + goto cclass_mb; + } + else { + if (BITSET_AT(((BitSetRef )p), *s) == 0) + goto fail; + + p += SIZE_BITSET; + GET_LENGTH_INC(tlen, p); + p += tlen; + s++; + } + STAT_OP_OUT; + break; + + case OP_CCLASS_NOT: STAT_OP_IN(OP_CCLASS_NOT); + DATA_ENSURE(1); + if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail; + p += SIZE_BITSET; + s += enc_len(encode, s); + STAT_OP_OUT; + break; + + case OP_CCLASS_MB_NOT: STAT_OP_IN(OP_CCLASS_MB_NOT); + DATA_ENSURE(1); + if (! ONIGENC_IS_MBC_HEAD(encode, s)) { + s++; + GET_LENGTH_INC(tlen, p); + p += tlen; + goto cc_mb_not_success; + } + + cclass_mb_not: + GET_LENGTH_INC(tlen, p); + { + OnigCodePoint code; + UChar *ss; + int mb_len = enc_len(encode, s); + + if (s + mb_len > end) { + DATA_ENSURE(1); + s = (UChar* )end; + p += tlen; + goto cc_mb_not_success; + } + + ss = s; + s += mb_len; + code = ONIGENC_MBC_TO_CODE(encode, ss, s); + +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS + if (onig_is_in_code_range(p, code)) goto fail; +#else + q = p; + ALIGNMENT_RIGHT(q); + if (onig_is_in_code_range(q, code)) goto fail; +#endif + } + p += tlen; + + cc_mb_not_success: + STAT_OP_OUT; + break; + + case OP_CCLASS_MIX_NOT: STAT_OP_IN(OP_CCLASS_MIX_NOT); + DATA_ENSURE(1); + if (ONIGENC_IS_MBC_HEAD(encode, s)) { + p += SIZE_BITSET; + goto cclass_mb_not; + } + else { + if (BITSET_AT(((BitSetRef )p), *s) != 0) + goto fail; + + p += SIZE_BITSET; + GET_LENGTH_INC(tlen, p); + p += tlen; + s++; + } + STAT_OP_OUT; + break; + + case OP_CCLASS_NODE: STAT_OP_IN(OP_CCLASS_NODE); + { + OnigCodePoint code; + void *node; + int mb_len; + UChar *ss; + + DATA_ENSURE(1); + GET_POINTER_INC(node, p); + mb_len = enc_len(encode, s); + ss = s; + s += mb_len; + code = ONIGENC_MBC_TO_CODE(encode, ss, s); + if (code_is_in_cclass_node(node, code, mb_len) == 0) goto fail; + } + STAT_OP_OUT; + break; + + case OP_ANYCHAR: STAT_OP_IN(OP_ANYCHAR); + DATA_ENSURE(1); + n = enc_len(encode, s); + DATA_ENSURE(n); + if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; + s += n; + STAT_OP_OUT; + break; + + case OP_ANYCHAR_ML: STAT_OP_IN(OP_ANYCHAR_ML); + DATA_ENSURE(1); + n = enc_len(encode, s); + DATA_ENSURE(n); + s += n; + STAT_OP_OUT; + break; + + case OP_ANYCHAR_STAR: STAT_OP_IN(OP_ANYCHAR_STAR); + while (s < end) { + STACK_PUSH_ALT(p, s, sprev); + n = enc_len(encode, s); + DATA_ENSURE(n); + if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; + sprev = s; + s += n; + } + STAT_OP_OUT; + break; + + case OP_ANYCHAR_ML_STAR: STAT_OP_IN(OP_ANYCHAR_ML_STAR); + while (s < end) { + STACK_PUSH_ALT(p, s, sprev); + n = enc_len(encode, s); + if (n > 1) { + DATA_ENSURE(n); + sprev = s; + s += n; + } + else { + sprev = s; + s++; + } + } + STAT_OP_OUT; + break; + + case OP_ANYCHAR_STAR_PEEK_NEXT: STAT_OP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); + while (s < end) { + if (*p == *s) { + STACK_PUSH_ALT(p + 1, s, sprev); + } + n = enc_len(encode, s); + DATA_ENSURE(n); + if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; + sprev = s; + s += n; + } + p++; + STAT_OP_OUT; + break; + + case OP_ANYCHAR_ML_STAR_PEEK_NEXT:STAT_OP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); + while (s < end) { + if (*p == *s) { + STACK_PUSH_ALT(p + 1, s, sprev); + } + n = enc_len(encode, s); + if (n >1) { + DATA_ENSURE(n); + sprev = s; + s += n; + } + else { + sprev = s; + s++; + } + } + p++; + STAT_OP_OUT; + break; + + case OP_WORD: STAT_OP_IN(OP_WORD); + DATA_ENSURE(1); + if (! ONIGENC_IS_MBC_WORD(encode, s, end)) + goto fail; + + s += enc_len(encode, s); + STAT_OP_OUT; + break; + + case OP_NOT_WORD: STAT_OP_IN(OP_NOT_WORD); + DATA_ENSURE(1); + if (ONIGENC_IS_MBC_WORD(encode, s, end)) + goto fail; + + s += enc_len(encode, s); + STAT_OP_OUT; + break; + + case OP_WORD_BOUND: STAT_OP_IN(OP_WORD_BOUND); + if (ON_STR_BEGIN(s)) { + DATA_ENSURE(1); + if (! ONIGENC_IS_MBC_WORD(encode, s, end)) + goto fail; + } + else if (ON_STR_END(s)) { + if (! ONIGENC_IS_MBC_WORD(encode, sprev, end)) + goto fail; + } + else { + if (ONIGENC_IS_MBC_WORD(encode, s, end) + == ONIGENC_IS_MBC_WORD(encode, sprev, end)) + goto fail; + } + STAT_OP_OUT; + continue; + break; + + case OP_NOT_WORD_BOUND: STAT_OP_IN(OP_NOT_WORD_BOUND); + if (ON_STR_BEGIN(s)) { + if (DATA_ENSURE_CHECK(1) && ONIGENC_IS_MBC_WORD(encode, s, end)) + goto fail; + } + else if (ON_STR_END(s)) { + if (ONIGENC_IS_MBC_WORD(encode, sprev, end)) + goto fail; + } + else { + if (ONIGENC_IS_MBC_WORD(encode, s, end) + != ONIGENC_IS_MBC_WORD(encode, sprev, end)) + goto fail; + } + STAT_OP_OUT; + continue; + break; + +#ifdef USE_WORD_BEGIN_END + case OP_WORD_BEGIN: STAT_OP_IN(OP_WORD_BEGIN); + if (DATA_ENSURE_CHECK(1) && ONIGENC_IS_MBC_WORD(encode, s, end)) { + if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) { + STAT_OP_OUT; + continue; + } + } + goto fail; + break; + + case OP_WORD_END: STAT_OP_IN(OP_WORD_END); + if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) { + if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) { + STAT_OP_OUT; + continue; + } + } + goto fail; + break; +#endif + + case OP_BEGIN_BUF: STAT_OP_IN(OP_BEGIN_BUF); + if (! ON_STR_BEGIN(s)) goto fail; + + STAT_OP_OUT; + continue; + break; + + case OP_END_BUF: STAT_OP_IN(OP_END_BUF); + if (! ON_STR_END(s)) goto fail; + + STAT_OP_OUT; + continue; + break; + + case OP_BEGIN_LINE: STAT_OP_IN(OP_BEGIN_LINE); + if (ON_STR_BEGIN(s)) { + if (IS_NOTBOL(msa->options)) goto fail; + STAT_OP_OUT; + continue; + } + else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) { + STAT_OP_OUT; + continue; + } + goto fail; + break; + + case OP_END_LINE: STAT_OP_IN(OP_END_LINE); + if (ON_STR_END(s)) { +#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { +#endif + if (IS_NOTEOL(msa->options)) goto fail; + STAT_OP_OUT; + continue; +#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + } +#endif + } + else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) { + STAT_OP_OUT; + continue; + } + goto fail; + break; + + case OP_SEMI_END_BUF: STAT_OP_IN(OP_SEMI_END_BUF); + if (ON_STR_END(s)) { +#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { +#endif + if (IS_NOTEOL(msa->options)) goto fail; /* Is it needed? */ + STAT_OP_OUT; + continue; +#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + } +#endif + } + else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) && + ON_STR_END(s + enc_len(encode, s))) { + STAT_OP_OUT; + continue; + } + goto fail; + break; + + case OP_BEGIN_POSITION: STAT_OP_IN(OP_BEGIN_POSITION); + if (s != msa->start) + goto fail; + + STAT_OP_OUT; + continue; + break; + + case OP_MEMORY_START_PUSH: STAT_OP_IN(OP_MEMORY_START_PUSH); + GET_MEMNUM_INC(mem, p); + STACK_PUSH_MEM_START(mem, s); + STAT_OP_OUT; + continue; + break; + + case OP_MEMORY_START: STAT_OP_IN(OP_MEMORY_START); + GET_MEMNUM_INC(mem, p); + mem_start_stk[mem] = (StackIndex )((void* )s); + STAT_OP_OUT; + continue; + break; + + case OP_MEMORY_END_PUSH: STAT_OP_IN(OP_MEMORY_END_PUSH); + GET_MEMNUM_INC(mem, p); + STACK_PUSH_MEM_END(mem, s); + STAT_OP_OUT; + continue; + break; + + case OP_MEMORY_END: STAT_OP_IN(OP_MEMORY_END); + GET_MEMNUM_INC(mem, p); + mem_end_stk[mem] = (StackIndex )((void* )s); + STAT_OP_OUT; + continue; + break; + +#ifdef USE_SUBEXP_CALL + case OP_MEMORY_END_PUSH_REC: STAT_OP_IN(OP_MEMORY_END_PUSH_REC); + GET_MEMNUM_INC(mem, p); + STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */ + STACK_PUSH_MEM_END(mem, s); + mem_start_stk[mem] = GET_STACK_INDEX(stkp); + STAT_OP_OUT; + continue; + break; + + case OP_MEMORY_END_REC: STAT_OP_IN(OP_MEMORY_END_REC); + GET_MEMNUM_INC(mem, p); + mem_end_stk[mem] = (StackIndex )((void* )s); + STACK_GET_MEM_START(mem, stkp); + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + mem_start_stk[mem] = GET_STACK_INDEX(stkp); + else + mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr); + + STACK_PUSH_MEM_END_MARK(mem); + STAT_OP_OUT; + continue; + break; +#endif + + case OP_BACKREF1: STAT_OP_IN(OP_BACKREF1); + mem = 1; + goto backref; + break; + + case OP_BACKREF2: STAT_OP_IN(OP_BACKREF2); + mem = 2; + goto backref; + break; + + case OP_BACKREF3: STAT_OP_IN(OP_BACKREF3); + mem = 3; + goto backref; + break; + + case OP_BACKREFN: STAT_OP_IN(OP_BACKREFN); + GET_MEMNUM_INC(mem, p); + backref: + { + int len; + UChar *pstart, *pend; + + /* if you want to remove following line, + you should check in parse and compile time. */ + if (mem > num_mem) goto fail; + if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; + else + pstart = (UChar* )((void* )mem_start_stk[mem]); + + pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) + ? STACK_AT(mem_end_stk[mem])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[mem])); + n = pend - pstart; + DATA_ENSURE(n); + sprev = s; + STRING_CMP(pstart, s, n); + while (sprev + (len = enc_len(encode, sprev)) < s) + sprev += len; + + STAT_OP_OUT; + continue; + } + break; + + case OP_BACKREFN_IC: STAT_OP_IN(OP_BACKREFN_IC); + GET_MEMNUM_INC(mem, p); + { + int len; + UChar *pstart, *pend; + + /* if you want to remove following line, + you should check in parse and compile time. */ + if (mem > num_mem) goto fail; + if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; + else + pstart = (UChar* )((void* )mem_start_stk[mem]); + + pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) + ? STACK_AT(mem_end_stk[mem])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[mem])); + n = pend - pstart; + DATA_ENSURE(n); + sprev = s; + STRING_CMP_IC(ambig_flag, pstart, &s, n); + while (sprev + (len = enc_len(encode, sprev)) < s) + sprev += len; + + STAT_OP_OUT; + continue; + } + break; + + case OP_BACKREF_MULTI: STAT_OP_IN(OP_BACKREF_MULTI); + { + int len, is_fail; + UChar *pstart, *pend, *swork; + + GET_LENGTH_INC(tlen, p); + for (i = 0; i < tlen; i++) { + GET_MEMNUM_INC(mem, p); + + if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; + else + pstart = (UChar* )((void* )mem_start_stk[mem]); + + pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) + ? STACK_AT(mem_end_stk[mem])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[mem])); + n = pend - pstart; + DATA_ENSURE(n); + sprev = s; + swork = s; + STRING_CMP_VALUE(pstart, swork, n, is_fail); + if (is_fail) continue; + s = swork; + while (sprev + (len = enc_len(encode, sprev)) < s) + sprev += len; + + p += (SIZE_MEMNUM * (tlen - i - 1)); + break; /* success */ + } + if (i == tlen) goto fail; + STAT_OP_OUT; + continue; + } + break; + + case OP_BACKREF_MULTI_IC: STAT_OP_IN(OP_BACKREF_MULTI_IC); + { + int len, is_fail; + UChar *pstart, *pend, *swork; + + GET_LENGTH_INC(tlen, p); + for (i = 0; i < tlen; i++) { + GET_MEMNUM_INC(mem, p); + + if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; + else + pstart = (UChar* )((void* )mem_start_stk[mem]); + + pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) + ? STACK_AT(mem_end_stk[mem])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[mem])); + n = pend - pstart; + DATA_ENSURE(n); + sprev = s; + swork = s; + STRING_CMP_VALUE_IC(ambig_flag, pstart, &swork, n, is_fail); + if (is_fail) continue; + s = swork; + while (sprev + (len = enc_len(encode, sprev)) < s) + sprev += len; + + p += (SIZE_MEMNUM * (tlen - i - 1)); + break; /* success */ + } + if (i == tlen) goto fail; + STAT_OP_OUT; + continue; + } + break; + + case OP_SET_OPTION_PUSH: STAT_OP_IN(OP_SET_OPTION_PUSH); + GET_OPTION_INC(option, p); + STACK_PUSH_ALT(p, s, sprev); + p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL; + STAT_OP_OUT; + continue; + break; + + case OP_SET_OPTION: STAT_OP_IN(OP_SET_OPTION); + GET_OPTION_INC(option, p); + STAT_OP_OUT; + continue; + break; + + case OP_NULL_CHECK_START: STAT_OP_IN(OP_NULL_CHECK_START); + GET_MEMNUM_INC(mem, p); /* mem: null check id */ + STACK_PUSH_NULL_CHECK_START(mem, s); + STAT_OP_OUT; + continue; + break; + + case OP_NULL_CHECK_END: STAT_OP_IN(OP_NULL_CHECK_END); + { + int isnull; + + GET_MEMNUM_INC(mem, p); /* mem: null check id */ + STACK_NULL_CHECK(isnull, mem, s); + if (isnull) { +#ifdef ONIG_DEBUG_MATCH + fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%d\n", + (int )mem, (int )s); +#endif + null_check_found: + /* empty loop founded, skip next instruction */ + switch (*p++) { + case OP_JUMP: + case OP_PUSH: + p += SIZE_RELADDR; + break; + case OP_REPEAT_INC: + case OP_REPEAT_INC_NG: + case OP_REPEAT_INC_SG: + case OP_REPEAT_INC_NG_SG: + p += SIZE_MEMNUM; + break; + default: + goto unexpected_bytecode_error; + break; + } + } + } + STAT_OP_OUT; + continue; + break; + +#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK + case OP_NULL_CHECK_END_MEMST: STAT_OP_IN(OP_NULL_CHECK_END_MEMST); + { + int isnull; + + GET_MEMNUM_INC(mem, p); /* mem: null check id */ + STACK_NULL_CHECK_MEMST(isnull, mem, s, reg); + if (isnull) { +#ifdef ONIG_DEBUG_MATCH + fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%d\n", + (int )mem, (int )s); +#endif + if (isnull == -1) goto fail; + goto null_check_found; + } + } + STAT_OP_OUT; + continue; + break; +#endif + +#ifdef USE_SUBEXP_CALL + case OP_NULL_CHECK_END_MEMST_PUSH: + STAT_OP_IN(OP_NULL_CHECK_END_MEMST_PUSH); + { + int isnull; + + GET_MEMNUM_INC(mem, p); /* mem: null check id */ +#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK + STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg); +#else + STACK_NULL_CHECK_REC(isnull, mem, s); +#endif + if (isnull) { +#ifdef ONIG_DEBUG_MATCH + fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%d\n", + (int )mem, (int )s); +#endif + if (isnull == -1) goto fail; + goto null_check_found; + } + else { + STACK_PUSH_NULL_CHECK_END(mem); + } + } + STAT_OP_OUT; + continue; + break; +#endif + + case OP_JUMP: STAT_OP_IN(OP_JUMP); + GET_RELADDR_INC(addr, p); + p += addr; + STAT_OP_OUT; + CHECK_INTERRUPT_IN_MATCH_AT; + continue; + break; + + case OP_PUSH: STAT_OP_IN(OP_PUSH); + GET_RELADDR_INC(addr, p); + STACK_PUSH_ALT(p + addr, s, sprev); + STAT_OP_OUT; + continue; + break; + + case OP_POP: STAT_OP_IN(OP_POP); + STACK_POP_ONE; + STAT_OP_OUT; + continue; + break; + + case OP_PUSH_OR_JUMP_EXACT1: STAT_OP_IN(OP_PUSH_OR_JUMP_EXACT1); + GET_RELADDR_INC(addr, p); + if (*p == *s && DATA_ENSURE_CHECK(1)) { + p++; + STACK_PUSH_ALT(p + addr, s, sprev); + STAT_OP_OUT; + continue; + } + p += (addr + 1); + STAT_OP_OUT; + continue; + break; + + case OP_PUSH_IF_PEEK_NEXT: STAT_OP_IN(OP_PUSH_IF_PEEK_NEXT); + GET_RELADDR_INC(addr, p); + if (*p == *s) { + p++; + STACK_PUSH_ALT(p + addr, s, sprev); + STAT_OP_OUT; + continue; + } + p++; + STAT_OP_OUT; + continue; + break; + + case OP_REPEAT: STAT_OP_IN(OP_REPEAT); + { + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + GET_RELADDR_INC(addr, p); + + STACK_ENSURE(1); + repeat_stk[mem] = GET_STACK_INDEX(stk); + STACK_PUSH_REPEAT(mem, p); + + if (reg->repeat_range[mem].lower == 0) { + STACK_PUSH_ALT(p + addr, s, sprev); + } + } + STAT_OP_OUT; + continue; + break; + + case OP_REPEAT_NG: STAT_OP_IN(OP_REPEAT_NG); + { + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + GET_RELADDR_INC(addr, p); + + STACK_ENSURE(1); + repeat_stk[mem] = GET_STACK_INDEX(stk); + STACK_PUSH_REPEAT(mem, p); + + if (reg->repeat_range[mem].lower == 0) { + STACK_PUSH_ALT(p, s, sprev); + p += addr; + } + } + STAT_OP_OUT; + continue; + break; + + case OP_REPEAT_INC: STAT_OP_IN(OP_REPEAT_INC); + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + si = repeat_stk[mem]; + stkp = STACK_AT(si); + + repeat_inc: + stkp->u.repeat.count++; + if (stkp->u.repeat.count == reg->repeat_range[mem].upper) { + /* end of repeat. Nothing to do. */ + } + else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { + STACK_PUSH_ALT(p, s, sprev); + p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */ + } + else { + p = stkp->u.repeat.pcode; + } + STACK_PUSH_REPEAT_INC(si); + STAT_OP_OUT; + CHECK_INTERRUPT_IN_MATCH_AT; + continue; + break; + + case OP_REPEAT_INC_SG: STAT_OP_IN(OP_REPEAT_INC_SG); + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + STACK_GET_REPEAT(mem, stkp); + si = GET_STACK_INDEX(stkp); + goto repeat_inc; + break; + + case OP_REPEAT_INC_NG: STAT_OP_IN(OP_REPEAT_INC_NG); + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + si = repeat_stk[mem]; + stkp = STACK_AT(si); + + repeat_inc_ng: + stkp->u.repeat.count++; + if (stkp->u.repeat.count < reg->repeat_range[mem].upper || + IS_REPEAT_INFINITE(reg->repeat_range[mem].upper)) { + if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { + UChar* pcode = stkp->u.repeat.pcode; + + STACK_PUSH_REPEAT_INC(si); + STACK_PUSH_ALT(pcode, s, sprev); + } + else { + p = stkp->u.repeat.pcode; + STACK_PUSH_REPEAT_INC(si); + } + } + else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) { + STACK_PUSH_REPEAT_INC(si); + } + STAT_OP_OUT; + CHECK_INTERRUPT_IN_MATCH_AT; + continue; + break; + + case OP_REPEAT_INC_NG_SG: STAT_OP_IN(OP_REPEAT_INC_NG_SG); + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + STACK_GET_REPEAT(mem, stkp); + si = GET_STACK_INDEX(stkp); + goto repeat_inc_ng; + break; + + case OP_PUSH_POS: STAT_OP_IN(OP_PUSH_POS); + STACK_PUSH_POS(s, sprev); + STAT_OP_OUT; + continue; + break; + + case OP_POP_POS: STAT_OP_IN(OP_POP_POS); + { + STACK_POS_END(stkp); + s = stkp->u.state.pstr; + sprev = stkp->u.state.pstr_prev; + } + STAT_OP_OUT; + continue; + break; + + case OP_PUSH_POS_NOT: STAT_OP_IN(OP_PUSH_POS_NOT); + GET_RELADDR_INC(addr, p); + STACK_PUSH_POS_NOT(p + addr, s, sprev); + STAT_OP_OUT; + continue; + break; + + case OP_FAIL_POS: STAT_OP_IN(OP_FAIL_POS); + STACK_POP_TIL_POS_NOT; + goto fail; + break; + + case OP_PUSH_STOP_BT: STAT_OP_IN(OP_PUSH_STOP_BT); + STACK_PUSH_STOP_BT; + STAT_OP_OUT; + continue; + break; + + case OP_POP_STOP_BT: STAT_OP_IN(OP_POP_STOP_BT); + STACK_STOP_BT_END; + STAT_OP_OUT; + continue; + break; + + case OP_LOOK_BEHIND: STAT_OP_IN(OP_LOOK_BEHIND); + GET_LENGTH_INC(tlen, p); + s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); + if (IS_NULL(s)) goto fail; + sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); + STAT_OP_OUT; + continue; + break; + + case OP_PUSH_LOOK_BEHIND_NOT: STAT_OP_IN(OP_PUSH_LOOK_BEHIND_NOT); + GET_RELADDR_INC(addr, p); + GET_LENGTH_INC(tlen, p); + q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); + if (IS_NULL(q)) { + /* too short case -> success. ex. /(?<!XXX)a/.match("a") + If you want to change to fail, replace following line. */ + p += addr; + /* goto fail; */ + } + else { + STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev); + s = q; + sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); + } + STAT_OP_OUT; + continue; + break; + + case OP_FAIL_LOOK_BEHIND_NOT: STAT_OP_IN(OP_FAIL_LOOK_BEHIND_NOT); + STACK_POP_TIL_LOOK_BEHIND_NOT; + goto fail; + break; + +#ifdef USE_SUBEXP_CALL + case OP_CALL: STAT_OP_IN(OP_CALL); + GET_ABSADDR_INC(addr, p); + STACK_PUSH_CALL_FRAME(p); + p = reg->p + addr; + STAT_OP_OUT; + continue; + break; + + case OP_RETURN: STAT_OP_IN(OP_RETURN); + STACK_RETURN(p); + STACK_PUSH_RETURN; + STAT_OP_OUT; + continue; + break; +#endif + + case OP_FINISH: + goto finish; + break; + + fail: + STAT_OP_OUT; + /* fall */ + case OP_FAIL: STAT_OP_IN(OP_FAIL); + STACK_POP; + p = stk->u.state.pcode; + s = stk->u.state.pstr; + sprev = stk->u.state.pstr_prev; + STAT_OP_OUT; + continue; + break; + + default: + goto bytecode_error; + + } /* end of switch */ + sprev = sbegin; + } /* end of while(1) */ + + finish: + STACK_SAVE; + return best_len; + +#ifdef ONIG_DEBUG + stack_error: + STACK_SAVE; + return ONIGERR_STACK_BUG; +#endif + + bytecode_error: + STACK_SAVE; + return ONIGERR_UNDEFINED_BYTECODE; + + unexpected_bytecode_error: + STACK_SAVE; + return ONIGERR_UNEXPECTED_BYTECODE; +} + + +static UChar* +slow_search(OnigEncoding enc, UChar* target, UChar* target_end, + const UChar* text, const UChar* text_end, UChar* text_range) +{ + UChar *t, *p, *s, *end; + + end = (UChar* )text_end; + end -= target_end - target - 1; + if (end > text_range) + end = text_range; + + s = (UChar* )text; + + while (s < end) { + if (*s == *target) { + p = s + 1; + t = target + 1; + while (t < target_end) { + if (*t != *p++) + break; + t++; + } + if (t == target_end) + return s; + } + s += enc_len(enc, s); + } + + return (UChar* )NULL; +} + +static int +str_lower_case_match(OnigEncoding enc, int ambig_flag, + const UChar* t, const UChar* tend, + const UChar* p, const UChar* end) +{ + int lowlen; + UChar *q, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN]; + const UChar* tsave; + const UChar* psave; + + tsave = t; + psave = p; + + retry: + while (t < tend) { + lowlen = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &p, end, lowbuf); + q = lowbuf; + while (lowlen > 0) { + if (*t++ != *q++) { + if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { + ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND; + t = tsave; + p = psave; + goto retry; + } + else + return 0; + } + lowlen--; + } + } + + return 1; +} + +static UChar* +slow_search_ic(OnigEncoding enc, int ambig_flag, + UChar* target, UChar* target_end, + const UChar* text, const UChar* text_end, UChar* text_range) +{ + UChar *s, *end; + + end = (UChar* )text_end; + end -= target_end - target - 1; + if (end > text_range) + end = text_range; + + s = (UChar* )text; + + while (s < end) { + if (str_lower_case_match(enc, ambig_flag, target, target_end, s, text_end)) + return s; + + s += enc_len(enc, s); + } + + return (UChar* )NULL; +} + +static UChar* +slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end, + const UChar* text, const UChar* adjust_text, + const UChar* text_end, const UChar* text_start) +{ + UChar *t, *p, *s; + + s = (UChar* )text_end; + s -= (target_end - target); + if (s > text_start) + s = (UChar* )text_start; + else + s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s); + + while (s >= text) { + if (*s == *target) { + p = s + 1; + t = target + 1; + while (t < target_end) { + if (*t != *p++) + break; + t++; + } + if (t == target_end) + return s; + } + s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s); + } + + return (UChar* )NULL; +} + +static UChar* +slow_search_backward_ic(OnigEncoding enc, int ambig_flag, + UChar* target, UChar* target_end, + const UChar* text, const UChar* adjust_text, + const UChar* text_end, const UChar* text_start) +{ + UChar *s; + + s = (UChar* )text_end; + s -= (target_end - target); + if (s > text_start) + s = (UChar* )text_start; + else + s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s); + + while (s >= text) { + if (str_lower_case_match(enc, ambig_flag, + target, target_end, s, text_end)) + return s; + + s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s); + } + + return (UChar* )NULL; +} + +static UChar* +bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, + const UChar* text, const UChar* text_end, + const UChar* text_range) +{ + const UChar *s, *t, *p, *end; + const UChar *tail; + int skip; + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n", + (int )text, (int )text_end, (int )text_range); +#endif + + end = text_range + (target_end - target) - 1; + if (end > text_end) + end = text_end; + + tail = target_end - 1; + s = text; + while ((s - text) < target_end - target) { + s += enc_len(reg->enc, s); + } + s--; /* set to text check tail position. */ + + if (IS_NULL(reg->int_map)) { + while (s < end) { + p = s; + t = tail; + while (t >= target && *p == *t) { + p--; t--; + } + if (t < target) return (UChar* )(p + 1); + + skip = reg->map[*s]; + p = s + 1; + if (p >= text_end) return (UChar* )NULL; + t = p; + do { + p += enc_len(reg->enc, p); + } while ((p - t) < skip && p < text_end); + + s += (p - t); + } + } + else { + while (s < end) { + p = s; + t = tail; + while (t >= target && *p == *t) { + p--; t--; + } + if (t < target) return (UChar* )(p + 1); + + skip = reg->int_map[*s]; + p = s + 1; + if (p >= text_end) return (UChar* )NULL; + t = p; + do { + p += enc_len(reg->enc, p); + } while ((p - t) < skip && p < text_end); + + s += (p - t); + } + } + return (UChar* )NULL; +} + +static UChar* +bm_search(regex_t* reg, const UChar* target, const UChar* target_end, + const UChar* text, const UChar* text_end, const UChar* text_range) +{ + const UChar *s, *t, *p, *end; + const UChar *tail; + + end = text_range + (target_end - target) - 1; + if (end > text_end) + end = text_end; + + tail = target_end - 1; + s = text + (target_end - target) - 1; + if (IS_NULL(reg->int_map)) { + while (s < end) { + p = s; + t = tail; + while (t >= target && *p == *t) { + p--; t--; + } + if (t < target) return (UChar* )(p + 1); + s += reg->map[*s]; + } + } + else { /* see int_map[] */ + while (s < end) { + p = s; + t = tail; + while (t >= target && *p == *t) { + p--; t--; + } + if (t < target) return (UChar* )(p + 1); + s += reg->int_map[*s]; + } + } + return (UChar* )NULL; +} + +static int +set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc, int** skip) + +{ + int i, len; + + if (IS_NULL(*skip)) { + *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); + if (IS_NULL(*skip)) return ONIGERR_MEMORY; + } + + len = end - s; + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) + (*skip)[i] = len; + + for (i = len - 1; i > 0; i--) + (*skip)[s[i]] = i; + + return 0; +} + +static UChar* +bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end, + const UChar* text, const UChar* adjust_text, + const UChar* text_end, const UChar* text_start) +{ + const UChar *s, *t, *p; + + s = text_end - (target_end - target); + if (text_start < s) + s = text_start; + else + s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s); + + while (s >= text) { + p = s; + t = target; + while (t < target_end && *p == *t) { + p++; t++; + } + if (t == target_end) + return (UChar* )s; + + s -= reg->int_map_backward[*s]; + s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s); + } + + return (UChar* )NULL; +} + +static UChar* +map_search(OnigEncoding enc, UChar map[], + const UChar* text, const UChar* text_range) +{ + const UChar *s = text; + + while (s < text_range) { + if (map[*s]) return (UChar* )s; + + s += enc_len(enc, s); + } + return (UChar* )NULL; +} + +static UChar* +map_search_backward(OnigEncoding enc, UChar map[], + const UChar* text, const UChar* adjust_text, + const UChar* text_start) +{ + const UChar *s = text_start; + + while (s >= text) { + if (map[*s]) return (UChar* )s; + + s = onigenc_get_prev_char_head(enc, adjust_text, s); + } + return (UChar* )NULL; +} + +extern int +onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region, + OnigOptionType option) +{ + int r; + UChar *prev; + MatchArg msa; + +#ifdef USE_MULTI_THREAD_SYSTEM + if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) { + ONIG_STATE_INC(reg); + if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { + onig_chain_reduce(reg); + ONIG_STATE_INC(reg); + } + } + else { + int n = 0; + while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) { + if (++n > THREAD_PASS_LIMIT_COUNT) + return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; + THREAD_PASS; + } + ONIG_STATE_INC(reg); + } +#endif /* USE_MULTI_THREAD_SYSTEM */ + + MATCH_ARG_INIT(msa, option, region, at); + + if (region +#ifdef USE_POSIX_REGION_OPTION + && !IS_POSIX_REGION(option) +#endif + ) { + r = onig_region_resize_clear(region, reg->num_mem + 1); + } + else + r = 0; + + if (r == 0) { + prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at); + r = match_at(reg, str, end, at, prev, &msa); + } + + MATCH_ARG_FREE(msa); + ONIG_STATE_DEC(reg); + return r; +} + +static int +forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, + UChar* range, UChar** low, UChar** high, UChar** low_prev) +{ + UChar *p, *pprev = (UChar* )NULL; + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "forward_search_range: str: %d, end: %d, s: %d, range: %d\n", + (int )str, (int )end, (int )s, (int )range); +#endif + + p = s; + if (reg->dmin > 0) { + if (ONIGENC_IS_SINGLEBYTE(reg->enc)) { + p += reg->dmin; + } + else { + UChar *q = p + reg->dmin; + while (p < q) p += enc_len(reg->enc, p); + } + } + + retry: + switch (reg->optimize) { + case ONIG_OPTIMIZE_EXACT: + p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range); + break; + case ONIG_OPTIMIZE_EXACT_IC: + p = slow_search_ic(reg->enc, reg->ambig_flag, + reg->exact, reg->exact_end, p, end, range); + break; + + case ONIG_OPTIMIZE_EXACT_BM: + p = bm_search(reg, reg->exact, reg->exact_end, p, end, range); + break; + + case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: + p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range); + break; + + case ONIG_OPTIMIZE_MAP: + p = map_search(reg->enc, reg->map, p, range); + break; + } + + if (p && p < range) { + if (p - reg->dmin < s) { + retry_gate: + pprev = p; + p += enc_len(reg->enc, p); + goto retry; + } + + if (reg->sub_anchor) { + UChar* prev; + + switch (reg->sub_anchor) { + case ANCHOR_BEGIN_LINE: + if (!ON_STR_BEGIN(p)) { + prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), p); + if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) + goto retry_gate; + } + break; + + case ANCHOR_END_LINE: + if (ON_STR_END(p)) { + prev = (UChar* )onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), p); + if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) + goto retry_gate; + } + else if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)) + goto retry_gate; + break; + } + } + + if (reg->dmax == 0) { + *low = p; + if (low_prev) { + if (*low > s) + *low_prev = onigenc_get_prev_char_head(reg->enc, s, p); + else + *low_prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), p); + } + } + else { + if (reg->dmax != ONIG_INFINITE_DISTANCE) { + *low = p - reg->dmax; + if (*low > s) { + *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s, + *low, (const UChar** )low_prev); + if (low_prev && IS_NULL(*low_prev)) + *low_prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : s), *low); + } + else { + if (low_prev) + *low_prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), *low); + } + } + } + /* no needs to adjust *high, *high is used as range check only */ + *high = p - reg->dmin; + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, + "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n", + (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax); +#endif + return 1; /* success */ + } + + return 0; /* fail */ +} + +static int set_bm_backward_skip P_((UChar* s, UChar* end, OnigEncoding enc, + int** skip)); + +#define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100 + +static int +backward_search_range(regex_t* reg, const UChar* str, const UChar* end, + UChar* s, const UChar* range, UChar* adjrange, + UChar** low, UChar** high) +{ + int r; + UChar *p; + + range += reg->dmin; + p = s; + + retry: + switch (reg->optimize) { + case ONIG_OPTIMIZE_EXACT: + exact_method: + p = slow_search_backward(reg->enc, reg->exact, reg->exact_end, + range, adjrange, end, p); + break; + + case ONIG_OPTIMIZE_EXACT_IC: + p = slow_search_backward_ic(reg->enc, reg->ambig_flag, + reg->exact, reg->exact_end, + range, adjrange, end, p); + break; + + case ONIG_OPTIMIZE_EXACT_BM: + case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: + if (IS_NULL(reg->int_map_backward)) { + if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD) + goto exact_method; + + r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc, + &(reg->int_map_backward)); + if (r) return r; + } + p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange, + end, p); + break; + + case ONIG_OPTIMIZE_MAP: + p = map_search_backward(reg->enc, reg->map, range, adjrange, p); + break; + } + + if (p) { + if (reg->sub_anchor) { + UChar* prev; + + switch (reg->sub_anchor) { + case ANCHOR_BEGIN_LINE: + if (!ON_STR_BEGIN(p)) { + prev = onigenc_get_prev_char_head(reg->enc, adjrange, p); + if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { + p = prev; + goto retry; + } + } + break; + + case ANCHOR_END_LINE: + if (ON_STR_END(p)) { + prev = onigenc_get_prev_char_head(reg->enc, adjrange, p); + if (IS_NULL(prev)) goto fail; + if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { + p = prev; + goto retry; + } + } + else if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)) { + p = onigenc_get_prev_char_head(reg->enc, adjrange, p); + if (IS_NULL(p)) goto fail; + goto retry; + } + break; + } + } + + /* no needs to adjust *high, *high is used as range check only */ + if (reg->dmax != ONIG_INFINITE_DISTANCE) { + *low = p - reg->dmax; + *high = p - reg->dmin; + *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high); + } + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "backward_search_range: low: %d, high: %d\n", + (int )(*low - str), (int )(*high - str)); +#endif + return 1; /* success */ + } + + fail: +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "backward_search_range: fail.\n"); +#endif + return 0; /* fail */ +} + + +extern int +onig_search(regex_t* reg, const UChar* str, const UChar* end, + const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option) +{ + int r; + UChar *s, *prev; + MatchArg msa; + +#ifdef USE_MULTI_THREAD_SYSTEM + if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) { + ONIG_STATE_INC(reg); + if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { + onig_chain_reduce(reg); + ONIG_STATE_INC(reg); + } + } + else { + int n = 0; + while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) { + if (++n > THREAD_PASS_LIMIT_COUNT) + return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; + THREAD_PASS; + } + ONIG_STATE_INC(reg); + } +#endif /* USE_MULTI_THREAD_SYSTEM */ + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, + "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n", + (int )str, (int )(end - str), (int )(start - str), (int )(range - str)); +#endif + + if (region +#ifdef USE_POSIX_REGION_OPTION + && !IS_POSIX_REGION(option) +#endif + ) { + r = onig_region_resize_clear(region, reg->num_mem + 1); + if (r) goto finish_no_msa; + } + + if (start > end || start < str) goto mismatch_no_msa; + +#define MATCH_AND_RETURN_CHECK \ + r = match_at(reg, str, end, s, prev, &msa);\ + if (r != ONIG_MISMATCH) {\ + if (r >= 0) goto match;\ + goto finish; /* error */ \ + } + + /* anchor optimize: resume search range */ + if (reg->anchor != 0 && str < end) { + UChar* semi_end; + + if (reg->anchor & ANCHOR_BEGIN_POSITION) { + /* search start-position only */ + begin_position: + if (range > start) + range = start + 1; + else + range = start; + } + else if (reg->anchor & ANCHOR_BEGIN_BUF) { + /* search str-position only */ + if (range > start) { + if (start != str) goto mismatch_no_msa; + range = str + 1; + } + else { + if (range <= str) { + start = str; + range = str; + } + else + goto mismatch_no_msa; + } + } + else if (reg->anchor & ANCHOR_END_BUF) { + semi_end = (UChar* )end; + + end_buf: + if ((OnigDistance )(semi_end - str) < reg->anchor_dmin) + goto mismatch_no_msa; + + if (range > start) { + if ((OnigDistance )(semi_end - start) > reg->anchor_dmax) { + start = semi_end - reg->anchor_dmax; + if (start < end) + start = onigenc_get_right_adjust_char_head(reg->enc, str, start); + else { /* match with empty at end */ + start = onigenc_get_prev_char_head(reg->enc, str, end); + } + } + if ((OnigDistance )(semi_end - (range - 1)) < reg->anchor_dmin) { + range = semi_end - reg->anchor_dmin + 1; + } + + if (start >= range) goto mismatch_no_msa; + } + else { + if ((OnigDistance )(semi_end - range) > reg->anchor_dmax) { + range = semi_end - reg->anchor_dmax; + } + if ((OnigDistance )(semi_end - start) < reg->anchor_dmin) { + start = semi_end - reg->anchor_dmin; + start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start); + if (range > start) goto mismatch_no_msa; + } + } + } + else if (reg->anchor & ANCHOR_SEMI_END_BUF) { + UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1); + + if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) { + semi_end = pre_end; + if (semi_end > str && start <= semi_end) { + goto end_buf; + } + } + else { + semi_end = (UChar* )end; + goto end_buf; + } + } + else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_PL)) { + goto begin_position; + } + } + else if (str == end) { /* empty string */ + static const UChar* address_for_empty_string = ""; + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "onig_search: empty string.\n"); +#endif + + if (reg->threshold_len == 0) { + start = end = str = address_for_empty_string; + s = (UChar* )start; + prev = (UChar* )NULL; + + MATCH_ARG_INIT(msa, option, region, start); + MATCH_AND_RETURN_CHECK; + goto mismatch; + } + goto mismatch_no_msa; + } + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n", + (int )(end - str), (int )(start - str), (int )(range - str)); +#endif + + MATCH_ARG_INIT(msa, option, region, start); + + s = (UChar* )start; + if (range > start) { /* forward search */ + if (s > str) + prev = onigenc_get_prev_char_head(reg->enc, str, s); + else + prev = (UChar* )NULL; + + if (reg->optimize != ONIG_OPTIMIZE_NONE) { + UChar *sch_range, *low, *high, *low_prev; + + sch_range = (UChar* )range; + if (reg->dmax != 0) { + if (reg->dmax == ONIG_INFINITE_DISTANCE) + sch_range = (UChar* )end; + else { + sch_range += reg->dmax; + if (sch_range > end) sch_range = (UChar* )end; + } + } + if (reg->dmax != ONIG_INFINITE_DISTANCE && + (end - start) >= reg->threshold_len) { + do { + if (! forward_search_range(reg, str, end, s, sch_range, + &low, &high, &low_prev)) goto mismatch; + if (s < low) { + s = low; + prev = low_prev; + } + while (s <= high) { + MATCH_AND_RETURN_CHECK; + prev = s; + s += enc_len(reg->enc, s); + } + if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) { + if (IS_NOT_NULL(prev)) { + while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && + s < range) { + prev = s; + s += enc_len(reg->enc, s); + } + } + } + } while (s < range); + goto mismatch; + } + else { /* check only. */ + if ((end - start) < reg->threshold_len || + ! forward_search_range(reg, str, end, s, sch_range, + &low, &high, (UChar** )NULL)) goto mismatch; + } + } + + do { + MATCH_AND_RETURN_CHECK; + prev = s; + s += enc_len(reg->enc, s); + } while (s <= range); /* exec s == range, because empty match with /$/. */ + } + else { /* backward search */ + if (reg->optimize != ONIG_OPTIMIZE_NONE) { + UChar *low, *high, *adjrange, *sch_start; + + if (range < end) + adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range); + else + adjrange = (UChar* )end; + + if (reg->dmax != ONIG_INFINITE_DISTANCE && + (end - range) >= reg->threshold_len) { + do { + sch_start = s + reg->dmax; + if (sch_start > end) sch_start = (UChar* )end; + if (backward_search_range(reg, str, end, sch_start, range, adjrange, + &low, &high) <= 0) + goto mismatch; + + if (s > high) + s = high; + + while (s >= low) { + prev = onigenc_get_prev_char_head(reg->enc, str, s); + MATCH_AND_RETURN_CHECK; + s = prev; + } + } while (s >= range); + goto mismatch; + } + else { /* check only. */ + if ((end - range) < reg->threshold_len) goto mismatch; + + sch_start = s; + if (reg->dmax != 0) { + if (reg->dmax == ONIG_INFINITE_DISTANCE) + sch_start = (UChar* )end; + else { + sch_start += reg->dmax; + if (sch_start > end) sch_start = (UChar* )end; + else + sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, + start, sch_start); + } + } + if (backward_search_range(reg, str, end, sch_start, range, adjrange, + &low, &high) <= 0) goto mismatch; + } + } + + do { + prev = onigenc_get_prev_char_head(reg->enc, str, s); + MATCH_AND_RETURN_CHECK; + s = prev; + } while (s >= range); + } + + mismatch: + r = ONIG_MISMATCH; + + finish: + MATCH_ARG_FREE(msa); + ONIG_STATE_DEC(reg); + + /* If result is mismatch and no FIND_NOT_EMPTY option, + then the region is not setted in match_at(). */ + if (IS_FIND_NOT_EMPTY(reg->options) && region +#ifdef USE_POSIX_REGION_OPTION + && !IS_POSIX_REGION(option) +#endif + ) { + onig_region_clear(region); + } + +#ifdef ONIG_DEBUG + if (r != ONIG_MISMATCH) + fprintf(stderr, "onig_search: error %d\n", r); +#endif + return r; + + mismatch_no_msa: + r = ONIG_MISMATCH; + finish_no_msa: + ONIG_STATE_DEC(reg); +#ifdef ONIG_DEBUG + if (r != ONIG_MISMATCH) + fprintf(stderr, "onig_search: error %d\n", r); +#endif + return r; + + match: + ONIG_STATE_DEC(reg); + MATCH_ARG_FREE(msa); + return s - str; +} + +extern OnigEncoding +onig_get_encoding(regex_t* reg) +{ + return reg->enc; +} + +extern OnigOptionType +onig_get_options(regex_t* reg) +{ + return reg->options; +} + +extern OnigAmbigType +onig_get_ambig_flag(regex_t* reg) +{ + return reg->ambig_flag; +} + +extern OnigSyntaxType* +onig_get_syntax(regex_t* reg) +{ + return reg->syntax; +} + +extern int +onig_number_of_captures(regex_t* reg) +{ + return reg->num_mem; +} + +extern int +onig_number_of_capture_histories(regex_t* reg) +{ +#ifdef USE_CAPTURE_HISTORY + int i, n; + + n = 0; + for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { + if (BIT_STATUS_AT(reg->capture_history, i) != 0) + n++; + } + return n; +#else + return 0; +#endif +} + +extern void +onig_copy_encoding(OnigEncoding to, OnigEncoding from) +{ + *to = *from; +} + +/********************************************************************** + regparse.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regparse.h" + +#define WARN_BUFSIZE 256 + +OnigSyntaxType OnigSyntaxRuby = { + (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | + ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | + ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS | + ONIG_SYN_OP_ESC_C_CONTROL ) + & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) + , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT | + ONIG_SYN_OP2_OPTION_RUBY | + ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF | + ONIG_SYN_OP2_ESC_G_SUBEXP_CALL | + ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT | + ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL | + ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB | + ONIG_SYN_OP2_ESC_H_XDIGIT ) + , ( SYN_GNU_REGEX_BV | + ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV | + ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND | + ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP | + ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME | + ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY | + ONIG_SYN_WARN_CC_OP_NOT_ESCAPED | + ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT ) + , ONIG_OPTION_NONE +}; + +OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY; + +extern void onig_null_warn(const char* s, ...) { } + +#ifdef DEFAULT_WARN_FUNCTION +static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION; +#else +static OnigWarnFunc onig_warn = onig_null_warn; +#endif + +#ifdef DEFAULT_VERB_WARN_FUNCTION +static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION; +#else +static OnigWarnFunc onig_verb_warn = onig_null_warn; +#endif + +extern void onig_set_warn_func(OnigWarnFunc f) +{ + onig_warn = f; +} + +extern void onig_set_verb_warn_func(OnigWarnFunc f) +{ + onig_verb_warn = f; +} + +static void +bbuf_free(BBuf* bbuf) +{ + if (IS_NOT_NULL(bbuf)) { + if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p); + xfree(bbuf); + } +} + +static int +bbuf_clone(BBuf** rto, BBuf* from) +{ + int r; + BBuf *to; + + *rto = to = (BBuf* )xmalloc(sizeof(BBuf)); + CHECK_NULL_RETURN_VAL(to, ONIGERR_MEMORY); + r = BBUF_INIT(to, from->alloc); + if (r != 0) return r; + to->used = from->used; + xmemcpy(to->p, from->p, from->used); + return 0; +} + +#define ONOFF(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f)) + +#define MBCODE_START_POS(enc) \ + (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80) + +#define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \ + add_code_range_to_buf(pbuf, MBCODE_START_POS(enc), ~((OnigCodePoint )0)) + +#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\ + if (! ONIGENC_IS_SINGLEBYTE(enc)) {\ + r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\ + if (r) return r;\ + }\ +} while (0) + + +#define BITSET_IS_EMPTY(bs,empty) do {\ + int i;\ + empty = 1;\ + for (i = 0; i < BITSET_SIZE; i++) {\ + if ((bs)[i] != 0) {\ + empty = 0; break;\ + }\ + }\ +} while (0) + +static void +bitset_set_range(BitSetRef bs, int from, int to) +{ + int i; + for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) { + BITSET_SET_BIT(bs, i); + } +} + +#if 0 +static void +bitset_set_all(BitSetRef bs) +{ + int i; + for (i = 0; i < BITSET_SIZE; i++) { + bs[i] = ~((Bits )0); + } +} +#endif + +static void +bitset_invert(BitSetRef bs) +{ + int i; + for (i = 0; i < BITSET_SIZE; i++) { + bs[i] = ~(bs[i]); + } +} + +static void +bitset_invert_to(BitSetRef from, BitSetRef to) +{ + int i; + for (i = 0; i < BITSET_SIZE; i++) { + to[i] = ~(from[i]); + } +} + +static void +bitset_and(BitSetRef dest, BitSetRef bs) +{ + int i; + for (i = 0; i < BITSET_SIZE; i++) { + dest[i] &= bs[i]; + } +} + +static void +bitset_or(BitSetRef dest, BitSetRef bs) +{ + int i; + for (i = 0; i < BITSET_SIZE; i++) { + dest[i] |= bs[i]; + } +} + +static void +bitset_copy(BitSetRef dest, BitSetRef bs) +{ + int i; + for (i = 0; i < BITSET_SIZE; i++) { + dest[i] = bs[i]; + } +} + +extern int +onig_strncmp(const UChar* s1, const UChar* s2, int n) +{ + int x; + + while (n-- > 0) { + x = *s2++ - *s1++; + if (x) return x; + } + return 0; +} + +static void +k_strcpy(UChar* dest, const UChar* src, const UChar* end) +{ + int len = end - src; + if (len > 0) { + xmemcpy(dest, src, len); + dest[len] = (UChar )0; + } +} + +static UChar* +strdup_with_null(OnigEncoding enc, UChar* s, UChar* end) +{ + int slen, term_len, i; + UChar *r; + + slen = end - s; + term_len = ONIGENC_MBC_MINLEN(enc); + + r = (UChar* )xmalloc(slen + term_len); + CHECK_NULL_RETURN(r); + xmemcpy(r, s, slen); + + for (i = 0; i < term_len; i++) + r[slen + i] = (UChar )0; + + return r; +} + + +/* scan pattern methods */ +#define PEND_VALUE 0 + +#define PFETCH_READY UChar* pfetch_prev +#define PEND (p < end ? 0 : 1) +#define PUNFETCH p = pfetch_prev +#define PINC do { \ + pfetch_prev = p; \ + p += ONIGENC_MBC_ENC_LEN(enc, p); \ +} while (0) +#define PFETCH(c) do { \ + c = ONIGENC_MBC_TO_CODE(enc, p, end); \ + pfetch_prev = p; \ + p += ONIGENC_MBC_ENC_LEN(enc, p); \ +} while (0) + +#define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE) +#define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c) + +static UChar* +k_strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end, + int capa) +{ + UChar* r; + + if (dest) + r = (UChar* )xrealloc(dest, capa + 1); + else + r = (UChar* )xmalloc(capa + 1); + + CHECK_NULL_RETURN(r); + k_strcpy(r + (dest_end - dest), src, src_end); + return r; +} + +/* dest on static area */ +static UChar* +strcat_capa_from_static(UChar* dest, UChar* dest_end, + const UChar* src, const UChar* src_end, int capa) +{ + UChar* r; + + r = (UChar* )xmalloc(capa + 1); + CHECK_NULL_RETURN(r); + k_strcpy(r, dest, dest_end); + k_strcpy(r + (dest_end - dest), src, src_end); + return r; +} + +#ifdef USE_NAMED_GROUP + +#define INIT_NAME_BACKREFS_ALLOC_NUM 8 + +typedef struct { + UChar* name; + int name_len; /* byte length */ + int back_num; /* number of backrefs */ + int back_alloc; + int back_ref1; + int* back_refs; +} NameEntry; + +#ifdef USE_ST_HASH_TABLE + +#include "st.h" + +typedef struct { + unsigned char* s; + unsigned char* end; +} st_strend_key; + +static int strend_cmp(st_strend_key*, st_strend_key*); +static int strend_hash(st_strend_key*); + +static struct st_hash_type type_strend_hash = { + strend_cmp, + strend_hash, +}; + +static st_table* +onig_st_init_strend_table_with_size(int size) +{ + return onig_st_init_table_with_size(&type_strend_hash, size); +} + +static int +onig_st_lookup_strend(st_table *table, const UChar* str_key, const UChar* end_key, st_data_t *value) +{ + st_strend_key key; + + key.s = (unsigned char* )str_key; + key.end = (unsigned char* )end_key; + + return onig_st_lookup(table, (st_data_t )(&key), value); +} + +static int +onig_st_insert_strend(st_table *table, const UChar* str_key, const UChar* end_key, st_data_t value) +{ + st_strend_key* key; + int result; + + key = (st_strend_key* )xmalloc(sizeof(st_strend_key)); + key->s = (unsigned char* )str_key; + key->end = (unsigned char* )end_key; + result = onig_st_insert(table, (st_data_t )key, value); + if (result) { + xfree(key); + } + return result; +} + +static int +strend_cmp(st_strend_key* x, st_strend_key* y) +{ + unsigned char *p, *q; + int c; + + if ((x->end - x->s) != (y->end - y->s)) + return 1; + + p = x->s; + q = y->s; + while (p < x->end) { + c = (int )*p - (int )*q; + if (c != 0) return c; + + p++; q++; + } + + return 0; +} + +static int +strend_hash(st_strend_key* x) +{ + int val; + unsigned char *p; + + val = 0; + p = x->s; + while (p < x->end) { + val = val * 997 + (int )*p++; + } + + return val + (val >> 5); +} + +typedef st_table NameTable; +typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */ + +#define NAMEBUF_SIZE 24 +#define NAMEBUF_SIZE_1 25 + +#ifdef ONIG_DEBUG +static int +i_print_name_entry(UChar* key, NameEntry* e, void* arg) +{ + int i; + FILE* fp = (FILE* )arg; + + fprintf(fp, "%s: ", e->name); + if (e->back_num == 0) + fputs("-", fp); + else if (e->back_num == 1) + fprintf(fp, "%d", e->back_ref1); + else { + for (i = 0; i < e->back_num; i++) { + if (i > 0) fprintf(fp, ", "); + fprintf(fp, "%d", e->back_refs[i]); + } + } + fputs("\n", fp); + return ST_CONTINUE; +} + +extern int +onig_print_names(FILE* fp, regex_t* reg) +{ + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) { + fprintf(fp, "name table\n"); + onig_st_foreach(t, i_print_name_entry, (HashDataType )fp); + fputs("\n", fp); + } + return 0; +} +#endif + +static int +i_free_name_entry(UChar* key, NameEntry* e, void* arg) +{ + xfree(e->name); + if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs); + xfree(key); + xfree(e); + return ST_DELETE; +} + +static int +names_clear(regex_t* reg) +{ + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) { + onig_st_foreach(t, i_free_name_entry, 0); + } + return 0; +} + +extern int +onig_names_free(regex_t* reg) +{ + int r; + NameTable* t; + + r = names_clear(reg); + if (r) return r; + + t = (NameTable* )reg->name_table; + if (IS_NOT_NULL(t)) onig_st_free_table(t); + reg->name_table = (void* )NULL; + return 0; +} + +static NameEntry* +name_find(regex_t* reg, const UChar* name, const UChar* name_end) +{ + NameEntry* e; + NameTable* t = (NameTable* )reg->name_table; + + e = (NameEntry* )NULL; + if (IS_NOT_NULL(t)) { + onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e))); + } + return e; +} + +typedef struct { + int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*); + regex_t* reg; + void* arg; + int ret; + OnigEncoding enc; +} INamesArg; + +static int +i_names(UChar* key, NameEntry* e, INamesArg* arg) +{ + int r = (*(arg->func))(e->name, + /*e->name + onigenc_str_bytelen_null(arg->enc, e->name), */ + e->name + e->name_len, + e->back_num, + (e->back_num > 1 ? e->back_refs : &(e->back_ref1)), + arg->reg, arg->arg); + if (r != 0) { + arg->ret = r; + return ST_STOP; + } + return ST_CONTINUE; +} + +extern int +onig_foreach_name(regex_t* reg, + int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), + void* arg) +{ + INamesArg narg; + NameTable* t = (NameTable* )reg->name_table; + + narg.ret = 0; + if (IS_NOT_NULL(t)) { + narg.func = func; + narg.reg = reg; + narg.arg = arg; + narg.enc = reg->enc; /* should be pattern encoding. */ + onig_st_foreach(t, i_names, (HashDataType )&narg); + } + return narg.ret; +} + +static int +i_renumber_name(UChar* key, NameEntry* e, GroupNumRemap* map) +{ + int i; + + if (e->back_num > 1) { + for (i = 0; i < e->back_num; i++) { + e->back_refs[i] = map[e->back_refs[i]].new_val; + } + } + else if (e->back_num == 1) { + e->back_ref1 = map[e->back_ref1].new_val; + } + + return ST_CONTINUE; +} + +extern int +onig_renumber_name_table(regex_t* reg, GroupNumRemap* map) +{ + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) { + onig_st_foreach(t, i_renumber_name, (HashDataType )map); + } + return 0; +} + + +extern int +onig_number_of_names(regex_t* reg) +{ + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) + return t->num_entries; + else + return 0; +} + +#else /* USE_ST_HASH_TABLE */ + +#define INIT_NAMES_ALLOC_NUM 8 + +typedef struct { + NameEntry* e; + int num; + int alloc; +} NameTable; + + +#ifdef ONIG_DEBUG +extern int +onig_print_names(FILE* fp, regex_t* reg) +{ + int i, j; + NameEntry* e; + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t) && t->num > 0) { + fprintf(fp, "name table\n"); + for (i = 0; i < t->num; i++) { + e = &(t->e[i]); + fprintf(fp, "%s: ", e->name); + if (e->back_num == 0) { + fputs("-", fp); + } + else if (e->back_num == 1) { + fprintf(fp, "%d", e->back_ref1); + } + else { + for (j = 0; j < e->back_num; j++) { + if (j > 0) fprintf(fp, ", "); + fprintf(fp, "%d", e->back_refs[j]); + } + } + fputs("\n", fp); + } + fputs("\n", fp); + } + return 0; +} +#endif + +static int +names_clear(regex_t* reg) +{ + int i; + NameEntry* e; + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) { + for (i = 0; i < t->num; i++) { + e = &(t->e[i]); + if (IS_NOT_NULL(e->name)) { + xfree(e->name); + e->name = NULL; + e->name_len = 0; + e->back_num = 0; + e->back_alloc = 0; + if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs); + e->back_refs = (int* )NULL; + } + } + if (IS_NOT_NULL(t->e)) { + xfree(t->e); + t->e = NULL; + } + t->num = 0; + } + return 0; +} + +extern int +onig_names_free(regex_t* reg) +{ + int r; + NameTable* t; + + r = names_clear(reg); + if (r) return r; + + t = (NameTable* )reg->name_table; + if (IS_NOT_NULL(t)) xfree(t); + reg->name_table = NULL; + return 0; +} + +static NameEntry* +name_find(regex_t* reg, UChar* name, UChar* name_end) +{ + int i, len; + NameEntry* e; + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) { + len = name_end - name; + for (i = 0; i < t->num; i++) { + e = &(t->e[i]); + if (len == e->name_len && onig_strncmp(name, e->name, len) == 0) + return e; + } + } + return (NameEntry* )NULL; +} + +extern int +onig_foreach_name(regex_t* reg, + int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), + void* arg) +{ + int i, r; + NameEntry* e; + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) { + for (i = 0; i < t->num; i++) { + e = &(t->e[i]); + r = (*func)(e->name, e->name + e->name_len, e->back_num, + (e->back_num > 1 ? e->back_refs : &(e->back_ref1)), + reg, arg); + if (r != 0) return r; + } + } + return 0; +} + +extern int +onig_number_of_names(regex_t* reg) +{ + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) + return t->num; + else + return 0; +} + +#endif /* else USE_ST_HASH_TABLE */ + +static int +name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) +{ + int alloc; + NameEntry* e; + NameTable* t = (NameTable* )reg->name_table; + + if (name_end - name <= 0) + return ONIGERR_EMPTY_GROUP_NAME; + + e = name_find(reg, name, name_end); + if (IS_NULL(e)) { +#ifdef USE_ST_HASH_TABLE + if (IS_NULL(t)) { + t = onig_st_init_strend_table_with_size(5); + reg->name_table = (void* )t; + } + e = (NameEntry* )xmalloc(sizeof(NameEntry)); + CHECK_NULL_RETURN_VAL(e, ONIGERR_MEMORY); + + e->name = strdup_with_null(reg->enc, name, name_end); + if (IS_NULL(e->name)) return ONIGERR_MEMORY; + onig_st_insert_strend(t, e->name, (e->name + (name_end - name)), + (HashDataType )e); + + e->name_len = name_end - name; + e->back_num = 0; + e->back_alloc = 0; + e->back_refs = (int* )NULL; + +#else + + if (IS_NULL(t)) { + alloc = INIT_NAMES_ALLOC_NUM; + t = (NameTable* )xmalloc(sizeof(NameTable)); + CHECK_NULL_RETURN_VAL(t, ONIGERR_MEMORY); + t->e = NULL; + t->alloc = 0; + t->num = 0; + + t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc); + if (IS_NULL(t->e)) { + xfree(t); + return ONIGERR_MEMORY; + } + t->alloc = alloc; + reg->name_table = t; + goto clear; + } + else if (t->num == t->alloc) { + int i; + + alloc = t->alloc * 2; + t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc); + CHECK_NULL_RETURN_VAL(t->e, ONIGERR_MEMORY); + t->alloc = alloc; + + clear: + for (i = t->num; i < t->alloc; i++) { + t->e[i].name = NULL; + t->e[i].name_len = 0; + t->e[i].back_num = 0; + t->e[i].back_alloc = 0; + t->e[i].back_refs = (int* )NULL; + } + } + e = &(t->e[t->num]); + t->num++; + e->name = strdup_with_null(reg->enc, name, name_end); + e->name_len = name_end - name; +#endif + } + + if (e->back_num >= 1 && + ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) { + onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME, + name, name_end); + return ONIGERR_MULTIPLEX_DEFINED_NAME; + } + + e->back_num++; + if (e->back_num == 1) { + e->back_ref1 = backref; + } + else { + if (e->back_num == 2) { + alloc = INIT_NAME_BACKREFS_ALLOC_NUM; + e->back_refs = (int* )xmalloc(sizeof(int) * alloc); + CHECK_NULL_RETURN_VAL(e->back_refs, ONIGERR_MEMORY); + e->back_alloc = alloc; + e->back_refs[0] = e->back_ref1; + e->back_refs[1] = backref; + } + else { + if (e->back_num > e->back_alloc) { + alloc = e->back_alloc * 2; + e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc); + CHECK_NULL_RETURN_VAL(e->back_refs, ONIGERR_MEMORY); + e->back_alloc = alloc; + } + e->back_refs[e->back_num - 1] = backref; + } + } + + return 0; +} + +extern int +onig_name_to_group_numbers(regex_t* reg, const UChar* name, + const UChar* name_end, int** nums) +{ + NameEntry* e; + + e = name_find(reg, name, name_end); + if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE; + + switch (e->back_num) { + case 0: + break; + case 1: + *nums = &(e->back_ref1); + break; + default: + *nums = e->back_refs; + break; + } + return e->back_num; +} + +extern int +onig_name_to_backref_number(regex_t* reg, const UChar* name, + const UChar* name_end, OnigRegion *region) +{ + int i, n, *nums; + + n = onig_name_to_group_numbers(reg, name, name_end, &nums); + if (n < 0) + return n; + else if (n == 0) + return ONIGERR_PARSER_BUG; + else if (n == 1) + return nums[0]; + else { + if (IS_NOT_NULL(region)) { + for (i = n - 1; i >= 0; i--) { + if (region->beg[nums[i]] != ONIG_REGION_NOTPOS) + return nums[i]; + } + } + return nums[n - 1]; + } +} + +#else /* USE_NAMED_GROUP */ + +extern int +onig_name_to_group_numbers(regex_t* reg, const UChar* name, + const UChar* name_end, int** nums) +{ + return ONIG_NO_SUPPORT_CONFIG; +} + +extern int +onig_name_to_backref_number(regex_t* reg, const UChar* name, + const UChar* name_end, OnigRegion* region) +{ + return ONIG_NO_SUPPORT_CONFIG; +} + +extern int +onig_foreach_name(regex_t* reg, + int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), + void* arg) +{ + return ONIG_NO_SUPPORT_CONFIG; +} + +extern int +onig_number_of_names(regex_t* reg) +{ + return 0; +} +#endif /* else USE_NAMED_GROUP */ + + +#define INIT_SCANENV_MEMNODES_ALLOC_SIZE 16 + +static void +scan_env_clear(ScanEnv* env) +{ + int i; + + BIT_STATUS_CLEAR(env->capture_history); + BIT_STATUS_CLEAR(env->bt_mem_start); + BIT_STATUS_CLEAR(env->bt_mem_end); + BIT_STATUS_CLEAR(env->backrefed_mem); + env->error = (UChar* )NULL; + env->error_end = (UChar* )NULL; + env->num_call = 0; + env->num_mem = 0; +#ifdef USE_NAMED_GROUP + env->num_named = 0; +#endif + env->mem_alloc = 0; + env->mem_nodes_dynamic = (Node** )NULL; + + for (i = 0; i < SCANENV_MEMNODES_SIZE; i++) + env->mem_nodes_static[i] = NULL_NODE; +} + +static int +scan_env_add_mem_entry(ScanEnv* env) +{ + int i, need, alloc; + Node** p; + + need = env->num_mem + 1; + if (need >= SCANENV_MEMNODES_SIZE) { + if (env->mem_alloc <= need) { + if (IS_NULL(env->mem_nodes_dynamic)) { + alloc = INIT_SCANENV_MEMNODES_ALLOC_SIZE; + p = (Node** )xmalloc(sizeof(Node*) * alloc); + xmemcpy(p, env->mem_nodes_static, + sizeof(Node*) * SCANENV_MEMNODES_SIZE); + } + else { + alloc = env->mem_alloc * 2; + p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc); + } + CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY); + + for (i = env->num_mem + 1; i < alloc; i++) + p[i] = NULL_NODE; + + env->mem_nodes_dynamic = p; + env->mem_alloc = alloc; + } + } + + env->num_mem++; + return env->num_mem; +} + +static int +scan_env_set_mem_node(ScanEnv* env, int num, Node* node) +{ + if (env->num_mem >= num) + SCANENV_MEM_NODES(env)[num] = node; + else + return ONIGERR_PARSER_BUG; + return 0; +} + + +#ifdef USE_RECYCLE_NODE +typedef struct _FreeNode { + struct _FreeNode* next; +} FreeNode; + +static FreeNode* FreeNodeList = (FreeNode* )NULL; +#endif + +extern void +onig_node_free(Node* node) +{ + start: + if (IS_NULL(node)) return ; + + switch (NTYPE(node)) { + case N_STRING: + if (IS_NOT_NULL(NSTRING(node).s) && NSTRING(node).s != NSTRING(node).buf) { + xfree(NSTRING(node).s); + } + break; + + case N_LIST: + case N_ALT: + onig_node_free(NCONS(node).left); + /* onig_node_free(NCONS(node).right); */ + { + Node* next_node = NCONS(node).right; + +#ifdef USE_RECYCLE_NODE + { + FreeNode* n = (FreeNode* )node; + + THREAD_ATOMIC_START; + n->next = FreeNodeList; + FreeNodeList = n; + THREAD_ATOMIC_END; + } +#else + xfree(node); +#endif + + node = next_node; + goto start; + } + break; + + case N_CCLASS: + { + CClassNode* cc = &(NCCLASS(node)); + + if (IS_CCLASS_SHARE(cc)) + return ; + + if (cc->mbuf) + bbuf_free(cc->mbuf); + } + break; + + case N_QUALIFIER: + if (NQUALIFIER(node).target) + onig_node_free(NQUALIFIER(node).target); + break; + + case N_EFFECT: + if (NEFFECT(node).target) + onig_node_free(NEFFECT(node).target); + break; + + case N_BACKREF: + if (IS_NOT_NULL(NBACKREF(node).back_dynamic)) + xfree(NBACKREF(node).back_dynamic); + break; + + case N_ANCHOR: + if (NANCHOR(node).target) + onig_node_free(NANCHOR(node).target); + break; + } + +#ifdef USE_RECYCLE_NODE + { + FreeNode* n = (FreeNode* )node; + + THREAD_ATOMIC_START; + n->next = FreeNodeList; + FreeNodeList = n; + THREAD_ATOMIC_END; + } +#else + xfree(node); +#endif +} + +#ifdef USE_RECYCLE_NODE +extern int +onig_free_node_list() +{ + FreeNode* n; + + THREAD_ATOMIC_START; + while (FreeNodeList) { + n = FreeNodeList; + FreeNodeList = FreeNodeList->next; + xfree(n); + } + THREAD_ATOMIC_END; + return 0; +} +#endif + +static Node* +node_new() +{ + Node* node; + +#ifdef USE_RECYCLE_NODE + if (IS_NOT_NULL(FreeNodeList)) { + THREAD_ATOMIC_START; + node = (Node* )FreeNodeList; + FreeNodeList = FreeNodeList->next; + THREAD_ATOMIC_END; + return node; + } +#endif + + node = (Node* )xmalloc(sizeof(Node)); + return node; +} + + +static void +initialize_cclass(CClassNode* cc) +{ + BITSET_CLEAR(cc->bs); + cc->flags = 0; + cc->mbuf = NULL; +} + +static Node* +node_new_cclass() +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + node->type = N_CCLASS; + + initialize_cclass(&(NCCLASS(node))); + return node; +} + +extern Node* +node_new_cclass_by_codepoint_range(int not, + OnigCodePoint sbr[], OnigCodePoint mbr[]) +{ + CClassNode* cc; + int n, i, j; + + Node* node = node_new(); + CHECK_NULL_RETURN(node); + node->type = N_CCLASS; + + cc = &(NCCLASS(node)); + cc->flags = 0; + if (not != 0) CCLASS_SET_NOT(cc); + + BITSET_CLEAR(cc->bs); + if (IS_NOT_NULL(sbr)) { + n = ONIGENC_CODE_RANGE_NUM(sbr); + for (i = 0; i < n; i++) { + for (j = ONIGENC_CODE_RANGE_FROM(sbr, i); + j <= (int )ONIGENC_CODE_RANGE_TO(sbr, i); j++) { + BITSET_SET_BIT(cc->bs, j); + } + } + } + + if (IS_NULL(mbr)) { + is_null: + cc->mbuf = NULL; + } + else { + BBuf* bbuf; + + n = ONIGENC_CODE_RANGE_NUM(mbr); + if (n == 0) goto is_null; + + bbuf = (BBuf* )xmalloc(sizeof(BBuf)); + CHECK_NULL_RETURN_VAL(bbuf, NULL); + bbuf->alloc = n + 1; + bbuf->used = n + 1; + bbuf->p = (UChar* )((void* )mbr); + + cc->mbuf = bbuf; + } + + return node; +} + +static Node* +node_new_ctype(int type) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + node->type = N_CTYPE; + NCTYPE(node).type = type; + return node; +} + +static Node* +node_new_anychar() +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + node->type = N_ANYCHAR; + return node; +} + +static Node* +node_new_list(Node* left, Node* right) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + node->type = N_LIST; + NCONS(node).left = left; + NCONS(node).right = right; + return node; +} + +extern Node* +onig_node_new_list(Node* left, Node* right) +{ + return node_new_list(left, right); +} + +static Node* +node_new_alt(Node* left, Node* right) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + node->type = N_ALT; + NCONS(node).left = left; + NCONS(node).right = right; + return node; +} + +extern Node* +onig_node_new_anchor(int type) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + node->type = N_ANCHOR; + NANCHOR(node).type = type; + NANCHOR(node).target = NULL; + NANCHOR(node).char_len = -1; + return node; +} + +static Node* +node_new_backref(int back_num, int* backrefs, int by_name, ScanEnv* env) +{ + int i; + Node* node = node_new(); + + CHECK_NULL_RETURN(node); + node->type = N_BACKREF; + NBACKREF(node).state = 0; + NBACKREF(node).back_num = back_num; + NBACKREF(node).back_dynamic = (int* )NULL; + if (by_name != 0) + NBACKREF(node).state |= NST_NAME_REF; + + for (i = 0; i < back_num; i++) { + if (backrefs[i] <= env->num_mem && + IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) { + NBACKREF(node).state |= NST_RECURSION; /* /...(\1).../ */ + break; + } + } + + if (back_num <= NODE_BACKREFS_SIZE) { + for (i = 0; i < back_num; i++) + NBACKREF(node).back_static[i] = backrefs[i]; + } + else { + int* p = (int* )xmalloc(sizeof(int) * back_num); + if (IS_NULL(p)) { + onig_node_free(node); + return NULL; + } + NBACKREF(node).back_dynamic = p; + for (i = 0; i < back_num; i++) + p[i] = backrefs[i]; + } + return node; +} + +#ifdef USE_SUBEXP_CALL +static Node* +node_new_call(UChar* name, UChar* name_end) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + + node->type = N_CALL; + NCALL(node).state = 0; + NCALL(node).ref_num = CALLNODE_REFNUM_UNDEF; + NCALL(node).target = NULL_NODE; + NCALL(node).name = name; + NCALL(node).name_end = name_end; + return node; +} +#endif + +static Node* +node_new_qualifier(int lower, int upper, int by_number) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + node->type = N_QUALIFIER; + NQUALIFIER(node).state = 0; + NQUALIFIER(node).target = NULL; + NQUALIFIER(node).lower = lower; + NQUALIFIER(node).upper = upper; + NQUALIFIER(node).greedy = 1; + NQUALIFIER(node).by_number = by_number; + NQUALIFIER(node).target_empty_info = NQ_TARGET_ISNOT_EMPTY; + NQUALIFIER(node).head_exact = NULL_NODE; + NQUALIFIER(node).next_head_exact = NULL_NODE; + NQUALIFIER(node).is_refered = 0; + return node; +} + +static Node* +node_new_effect(int type) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + node->type = N_EFFECT; + NEFFECT(node).type = type; + NEFFECT(node).state = 0; + NEFFECT(node).regnum = 0; + NEFFECT(node).option = 0; + NEFFECT(node).target = NULL; + NEFFECT(node).call_addr = -1; + NEFFECT(node).opt_count = 0; + return node; +} + +extern Node* +onig_node_new_effect(int type) +{ + return node_new_effect(type); +} + +static Node* +node_new_effect_memory(OnigOptionType option, int is_named) +{ + Node* node = node_new_effect(EFFECT_MEMORY); + CHECK_NULL_RETURN(node); + if (is_named != 0) + SET_EFFECT_STATUS(node, NST_NAMED_GROUP); + +#ifdef USE_SUBEXP_CALL + NEFFECT(node).option = option; +#endif + return node; +} + +static Node* +node_new_option(OnigOptionType option) +{ + Node* node = node_new_effect(EFFECT_OPTION); + CHECK_NULL_RETURN(node); + NEFFECT(node).option = option; + return node; +} + +extern int +onig_node_str_cat(Node* node, const UChar* s, const UChar* end) +{ + int addlen = end - s; + + if (addlen > 0) { + int len = NSTRING(node).end - NSTRING(node).s; + + if (NSTRING(node).capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) { + UChar* p; + int capa = len + addlen + NODE_STR_MARGIN; + + if (capa <= NSTRING(node).capa) { + k_strcpy(NSTRING(node).s + len, s, end); + } + else { + if (NSTRING(node).s == NSTRING(node).buf) + p = strcat_capa_from_static(NSTRING(node).s, NSTRING(node).end, + s, end, capa); + else + p = k_strcat_capa(NSTRING(node).s, NSTRING(node).end, s, end, capa); + + CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY); + NSTRING(node).s = p; + NSTRING(node).capa = capa; + } + } + else { + k_strcpy(NSTRING(node).s + len, s, end); + } + NSTRING(node).end = NSTRING(node).s + len + addlen; + } + + return 0; +} + +static int +node_str_cat_char(Node* node, UChar c) +{ + UChar s[1]; + + s[0] = c; + return onig_node_str_cat(node, s, s + 1); +} + +extern void +onig_node_conv_to_str_node(Node* node, int flag) +{ + node->type = N_STRING; + + NSTRING(node).flag = flag; + NSTRING(node).capa = 0; + NSTRING(node).s = NSTRING(node).buf; + NSTRING(node).end = NSTRING(node).buf; +} + +extern void +onig_node_str_clear(Node* node) +{ + if (NSTRING(node).capa != 0 && + IS_NOT_NULL(NSTRING(node).s) && NSTRING(node).s != NSTRING(node).buf) { + xfree(NSTRING(node).s); + } + + NSTRING(node).capa = 0; + NSTRING(node).flag = 0; + NSTRING(node).s = NSTRING(node).buf; + NSTRING(node).end = NSTRING(node).buf; +} + +static Node* +node_new_str(const UChar* s, const UChar* end) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + + node->type = N_STRING; + NSTRING(node).capa = 0; + NSTRING(node).flag = 0; + NSTRING(node).s = NSTRING(node).buf; + NSTRING(node).end = NSTRING(node).buf; + if (onig_node_str_cat(node, s, end)) { + onig_node_free(node); + return NULL; + } + return node; +} + +extern Node* +onig_node_new_str(const UChar* s, const UChar* end) +{ + return node_new_str(s, end); +} + +static Node* +node_new_str_raw(UChar* s, UChar* end) +{ + Node* node = node_new_str(s, end); + NSTRING_SET_RAW(node); + return node; +} + +static Node* +node_new_empty() +{ + return node_new_str(NULL, NULL); +} + +static Node* +node_new_str_raw_char(UChar c) +{ + UChar p[1]; + + p[0] = c; + return node_new_str_raw(p, p + 1); +} + +static Node* +str_node_split_last_char(StrNode* sn, OnigEncoding enc) +{ + const UChar *p; + Node* n = NULL_NODE; + + if (sn->end > sn->s) { + p = onigenc_get_prev_char_head(enc, sn->s, sn->end); + if (p && p > sn->s) { /* can be splitted. */ + n = node_new_str(p, sn->end); + if ((sn->flag & NSTR_RAW) != 0) + NSTRING_SET_RAW(n); + sn->end = (UChar* )p; + } + } + return n; +} + +static int +str_node_can_be_split(StrNode* sn, OnigEncoding enc) +{ + if (sn->end > sn->s) { + return ((enc_len(enc, sn->s) < sn->end - sn->s) ? 1 : 0); + } + return 0; +} + +extern int +onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc) +{ + unsigned int num, val; + OnigCodePoint c; + UChar* p = *src; + PFETCH_READY; + + num = 0; + while (!PEND) { + PFETCH(c); + if (ONIGENC_IS_CODE_DIGIT(enc, c)) { + val = (unsigned int )DIGITVAL(c); + if ((INT_MAX_LIMIT - val) / 10UL < num) + return -1; /* overflow */ + + num = num * 10 + val; + } + else { + PUNFETCH; + break; + } + } + *src = p; + return num; +} + +static int +scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen, + OnigEncoding enc) +{ + OnigCodePoint c; + unsigned int num, val; + UChar* p = *src; + PFETCH_READY; + + num = 0; + while (!PEND && maxlen-- != 0) { + PFETCH(c); + if (ONIGENC_IS_CODE_XDIGIT(enc, c)) { + val = (unsigned int )XDIGITVAL(enc,c); + if ((INT_MAX_LIMIT - val) / 16UL < num) + return -1; /* overflow */ + + num = (num << 4) + XDIGITVAL(enc,c); + } + else { + PUNFETCH; + break; + } + } + *src = p; + return num; +} + +static int +scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen, + OnigEncoding enc) +{ + OnigCodePoint c; + unsigned int num, val; + UChar* p = *src; + PFETCH_READY; + + num = 0; + while (!PEND && maxlen-- != 0) { + PFETCH(c); + if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') { + val = ODIGITVAL(c); + if ((INT_MAX_LIMIT - val) / 8UL < num) + return -1; /* overflow */ + + num = (num << 3) + val; + } + else { + PUNFETCH; + break; + } + } + *src = p; + return num; +} + + +#define BBUF_WRITE_CODE_POINT(bbuf,pos,code) \ + BBUF_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT) + +/* data format: + [n][from-1][to-1][from-2][to-2] ... [from-n][to-n] + (all data size is OnigCodePoint) + */ +static int +new_code_range(BBuf** pbuf) +{ +#define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5) + int r; + OnigCodePoint n; + BBuf* bbuf; + + bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf)); + CHECK_NULL_RETURN_VAL(*pbuf, ONIGERR_MEMORY); + r = BBUF_INIT(*pbuf, INIT_MULTI_BYTE_RANGE_SIZE); + if (r) return r; + + n = 0; + BBUF_WRITE_CODE_POINT(bbuf, 0, n); + return 0; +} + +static int +add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to) +{ + int r, inc_n, pos; + int low, high, bound, x; + OnigCodePoint n, *data; + BBuf* bbuf; + + if (from > to) { + n = from; from = to; to = n; + } + + if (IS_NULL(*pbuf)) { + r = new_code_range(pbuf); + if (r) return r; + bbuf = *pbuf; + n = 0; + } + else { + bbuf = *pbuf; + GET_CODE_POINT(n, bbuf->p); + } + data = (OnigCodePoint* )(bbuf->p); + data++; + + for (low = 0, bound = n; low < bound; ) { + x = (low + bound) >> 1; + if (from > data[x*2 + 1]) + low = x + 1; + else + bound = x; + } + + for (high = low, bound = n; high < bound; ) { + x = (high + bound) >> 1; + if (to >= data[x*2] - 1) + high = x + 1; + else + bound = x; + } + + inc_n = low + 1 - high; + if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM) + return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES; + + if (inc_n != 1) { + if (from > data[low*2]) + from = data[low*2]; + if (to < data[(high - 1)*2 + 1]) + to = data[(high - 1)*2 + 1]; + } + + if (inc_n != 0 && (OnigCodePoint )high < n) { + int from_pos = SIZE_CODE_POINT * (1 + high * 2); + int to_pos = SIZE_CODE_POINT * (1 + (low + 1) * 2); + int size = (n - high) * 2 * SIZE_CODE_POINT; + + if (inc_n > 0) { + BBUF_MOVE_RIGHT(bbuf, from_pos, to_pos, size); + } + else { + BBUF_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos); + } + } + + pos = SIZE_CODE_POINT * (1 + low * 2); + BBUF_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2); + BBUF_WRITE_CODE_POINT(bbuf, pos, from); + BBUF_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to); + n += inc_n; + BBUF_WRITE_CODE_POINT(bbuf, 0, n); + + return 0; +} + +static int +add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to) +{ + if (from > to) { + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) + return 0; + else + return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; + } + + return add_code_range_to_buf(pbuf, from, to); +} + +static int +not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf) +{ + int r, i, n; + OnigCodePoint pre, from, *data, to = 0; + + *pbuf = (BBuf* )NULL; + if (IS_NULL(bbuf)) { + set_all: + return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf); + } + + data = (OnigCodePoint* )(bbuf->p); + GET_CODE_POINT(n, data); + data++; + if (n <= 0) goto set_all; + + r = 0; + pre = MBCODE_START_POS(enc); + for (i = 0; i < n; i++) { + from = data[i*2]; + to = data[i*2+1]; + if (pre <= from - 1) { + r = add_code_range_to_buf(pbuf, pre, from - 1); + if (r != 0) return r; + } + if (to == ~((OnigCodePoint )0)) break; + pre = to + 1; + } + if (to < ~((OnigCodePoint )0)) { + r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0)); + } + return r; +} + +#define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2) do {\ + BBuf *tbuf; \ + int tnot; \ + tnot = not1; not1 = not2; not2 = tnot; \ + tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \ +} while (0) + +static int +or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1, + BBuf* bbuf2, int not2, BBuf** pbuf) +{ + int r; + OnigCodePoint i, n1, *data1; + OnigCodePoint from, to; + + *pbuf = (BBuf* )NULL; + if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) { + if (not1 != 0 || not2 != 0) + return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf); + return 0; + } + + r = 0; + if (IS_NULL(bbuf2)) + SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2); + + if (IS_NULL(bbuf1)) { + if (not1 != 0) { + return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf); + } + else { + if (not2 == 0) { + return bbuf_clone(pbuf, bbuf2); + } + else { + return not_code_range_buf(enc, bbuf2, pbuf); + } + } + } + + if (not1 != 0) + SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2); + + data1 = (OnigCodePoint* )(bbuf1->p); + GET_CODE_POINT(n1, data1); + data1++; + + if (not2 == 0 && not1 == 0) { /* 1 OR 2 */ + r = bbuf_clone(pbuf, bbuf2); + } + else if (not1 == 0) { /* 1 OR (not 2) */ + r = not_code_range_buf(enc, bbuf2, pbuf); + } + if (r != 0) return r; + + for (i = 0; i < n1; i++) { + from = data1[i*2]; + to = data1[i*2+1]; + r = add_code_range_to_buf(pbuf, from, to); + if (r != 0) return r; + } + return 0; +} + +static int +and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1, + OnigCodePoint* data, int n) +{ + int i, r; + OnigCodePoint from2, to2; + + for (i = 0; i < n; i++) { + from2 = data[i*2]; + to2 = data[i*2+1]; + if (from2 < from1) { + if (to2 < from1) continue; + else { + from1 = to2 + 1; + } + } + else if (from2 <= to1) { + if (to2 < to1) { + if (from1 <= from2 - 1) { + r = add_code_range_to_buf(pbuf, from1, from2-1); + if (r != 0) return r; + } + from1 = to2 + 1; + } + else { + to1 = from2 - 1; + } + } + else { + from1 = from2; + } + if (from1 > to1) break; + } + if (from1 <= to1) { + r = add_code_range_to_buf(pbuf, from1, to1); + if (r != 0) return r; + } + return 0; +} + +static int +and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf) +{ + int r; + OnigCodePoint i, j, n1, n2, *data1, *data2; + OnigCodePoint from, to, from1, to1, from2, to2; + + *pbuf = (BBuf* )NULL; + if (IS_NULL(bbuf1)) { + if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */ + return bbuf_clone(pbuf, bbuf2); + return 0; + } + else if (IS_NULL(bbuf2)) { + if (not2 != 0) + return bbuf_clone(pbuf, bbuf1); + return 0; + } + + if (not1 != 0) + SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2); + + data1 = (OnigCodePoint* )(bbuf1->p); + data2 = (OnigCodePoint* )(bbuf2->p); + GET_CODE_POINT(n1, data1); + GET_CODE_POINT(n2, data2); + data1++; + data2++; + + if (not2 == 0 && not1 == 0) { /* 1 AND 2 */ + for (i = 0; i < n1; i++) { + from1 = data1[i*2]; + to1 = data1[i*2+1]; + for (j = 0; j < n2; j++) { + from2 = data2[j*2]; + to2 = data2[j*2+1]; + if (from2 > to1) break; + if (to2 < from1) continue; + from = MAX(from1, from2); + to = MIN(to1, to2); + r = add_code_range_to_buf(pbuf, from, to); + if (r != 0) return r; + } + } + } + else if (not1 == 0) { /* 1 AND (not 2) */ + for (i = 0; i < n1; i++) { + from1 = data1[i*2]; + to1 = data1[i*2+1]; + r = and_code_range1(pbuf, from1, to1, data2, n2); + if (r != 0) return r; + } + } + + return 0; +} + +static int +clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc) +{ + BBuf *tbuf; + int r; + + if (IS_CCLASS_NOT(cc)) { + bitset_invert(cc->bs); + + if (! ONIGENC_IS_SINGLEBYTE(enc)) { + r = not_code_range_buf(enc, cc->mbuf, &tbuf); + if (r != 0) return r; + + bbuf_free(cc->mbuf); + cc->mbuf = tbuf; + } + + CCLASS_CLEAR_NOT(cc); + } + + return 0; +} + +static int +and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc) +{ + int r, not1, not2; + BBuf *buf1, *buf2, *pbuf; + BitSetRef bsr1, bsr2; + BitSet bs1, bs2; + + not1 = IS_CCLASS_NOT(dest); + bsr1 = dest->bs; + buf1 = dest->mbuf; + not2 = IS_CCLASS_NOT(cc); + bsr2 = cc->bs; + buf2 = cc->mbuf; + + if (not1 != 0) { + bitset_invert_to(bsr1, bs1); + bsr1 = bs1; + } + if (not2 != 0) { + bitset_invert_to(bsr2, bs2); + bsr2 = bs2; + } + bitset_and(bsr1, bsr2); + if (bsr1 != dest->bs) { + bitset_copy(dest->bs, bsr1); + bsr1 = dest->bs; + } + if (not1 != 0) { + bitset_invert(dest->bs); + } + + if (! ONIGENC_IS_SINGLEBYTE(enc)) { + if (not1 != 0 && not2 != 0) { + r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf); + } + else { + r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf); + if (r == 0 && not1 != 0) { + BBuf *tbuf; + r = not_code_range_buf(enc, pbuf, &tbuf); + if (r != 0) { + bbuf_free(pbuf); + return r; + } + bbuf_free(pbuf); + pbuf = tbuf; + } + } + if (r != 0) return r; + + dest->mbuf = pbuf; + bbuf_free(buf1); + return r; + } + return 0; +} + +static int +or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc) +{ + int r, not1, not2; + BBuf *buf1, *buf2, *pbuf; + BitSetRef bsr1, bsr2; + BitSet bs1, bs2; + + not1 = IS_CCLASS_NOT(dest); + bsr1 = dest->bs; + buf1 = dest->mbuf; + not2 = IS_CCLASS_NOT(cc); + bsr2 = cc->bs; + buf2 = cc->mbuf; + + if (not1 != 0) { + bitset_invert_to(bsr1, bs1); + bsr1 = bs1; + } + if (not2 != 0) { + bitset_invert_to(bsr2, bs2); + bsr2 = bs2; + } + bitset_or(bsr1, bsr2); + if (bsr1 != dest->bs) { + bitset_copy(dest->bs, bsr1); + bsr1 = dest->bs; + } + if (not1 != 0) { + bitset_invert(dest->bs); + } + + if (! ONIGENC_IS_SINGLEBYTE(enc)) { + if (not1 != 0 && not2 != 0) { + r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf); + } + else { + r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf); + if (r == 0 && not1 != 0) { + BBuf *tbuf; + r = not_code_range_buf(enc, pbuf, &tbuf); + if (r != 0) { + bbuf_free(pbuf); + return r; + } + bbuf_free(pbuf); + pbuf = tbuf; + } + } + if (r != 0) return r; + + dest->mbuf = pbuf; + bbuf_free(buf1); + return r; + } + else + return 0; +} + +static int +conv_backslash_value(int c, ScanEnv* env) +{ + if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) { + switch (c) { + case 'n': return '\n'; + case 't': return '\t'; + case 'r': return '\r'; + case 'f': return '\f'; + case 'a': return '\007'; + case 'b': return '\010'; + case 'e': return '\033'; + case 'v': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB)) + return '\v'; + break; + + default: + break; + } + } + return c; +} + +static int +is_invalid_qualifier_target(Node* node) +{ + switch (NTYPE(node)) { + case N_ANCHOR: + return 1; + break; + + case N_EFFECT: + if (NEFFECT(node).type == EFFECT_OPTION) + return is_invalid_qualifier_target(NEFFECT(node).target); + break; + + case N_LIST: /* ex. (?:\G\A)* */ + do { + if (! is_invalid_qualifier_target(NCONS(node).left)) return 0; + } while (IS_NOT_NULL(node = NCONS(node).right)); + return 0; + break; + + case N_ALT: /* ex. (?:abc|\A)* */ + do { + if (is_invalid_qualifier_target(NCONS(node).left)) return 1; + } while (IS_NOT_NULL(node = NCONS(node).right)); + break; + + default: + break; + } + return 0; +} + +/* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */ +static int +popular_qualifier_num(QualifierNode* qf) +{ + if (qf->greedy) { + if (qf->lower == 0) { + if (qf->upper == 1) return 0; + else if (IS_REPEAT_INFINITE(qf->upper)) return 1; + } + else if (qf->lower == 1) { + if (IS_REPEAT_INFINITE(qf->upper)) return 2; + } + } + else { + if (qf->lower == 0) { + if (qf->upper == 1) return 3; + else if (IS_REPEAT_INFINITE(qf->upper)) return 4; + } + else if (qf->lower == 1) { + if (IS_REPEAT_INFINITE(qf->upper)) return 5; + } + } + return -1; +} + + +enum ReduceType { + RQ_ASIS = 0, /* as is */ + RQ_DEL = 1, /* delete parent */ + RQ_A, /* to '*' */ + RQ_AQ, /* to '*?' */ + RQ_QQ, /* to '??' */ + RQ_P_QQ, /* to '+)??' */ + RQ_PQ_Q, /* to '+?)?' */ +}; + +static enum ReduceType ReduceTypeTable[6][6] = { + {RQ_DEL, RQ_A, RQ_A, RQ_QQ, RQ_AQ, RQ_ASIS}, /* '?' */ + {RQ_DEL, RQ_DEL, RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL}, /* '*' */ + {RQ_A, RQ_A, RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL}, /* '+' */ + {RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL, RQ_AQ, RQ_AQ}, /* '??' */ + {RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL}, /* '*?' */ + {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL} /* '+?' */ +}; + +extern void +onig_reduce_nested_qualifier(Node* pnode, Node* cnode) +{ + int pnum, cnum; + QualifierNode *p, *c; + + p = &(NQUALIFIER(pnode)); + c = &(NQUALIFIER(cnode)); + pnum = popular_qualifier_num(p); + cnum = popular_qualifier_num(c); + + switch(ReduceTypeTable[cnum][pnum]) { + case RQ_DEL: + *p = *c; + break; + case RQ_A: + p->target = c->target; + p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1; + break; + case RQ_AQ: + p->target = c->target; + p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0; + break; + case RQ_QQ: + p->target = c->target; + p->lower = 0; p->upper = 1; p->greedy = 0; + break; + case RQ_P_QQ: + p->target = cnode; + p->lower = 0; p->upper = 1; p->greedy = 0; + c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1; + return ; + break; + case RQ_PQ_Q: + p->target = cnode; + p->lower = 0; p->upper = 1; p->greedy = 1; + c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0; + return ; + break; + case RQ_ASIS: + p->target = cnode; + return ; + break; + } + + c->target = NULL_NODE; + onig_node_free(cnode); +} + + +enum TokenSyms { + TK_EOT = 0, /* end of token */ + TK_RAW_BYTE = 1, + TK_CHAR, + TK_STRING, + TK_CODE_POINT, + TK_ANYCHAR, + TK_CHAR_TYPE, + TK_BACKREF, + TK_CALL, + TK_ANCHOR, + TK_OP_REPEAT, + TK_INTERVAL, + TK_ANYCHAR_ANYTIME, /* SQL '%' == .* */ + TK_ALT, + TK_SUBEXP_OPEN, + TK_SUBEXP_CLOSE, + TK_CC_OPEN, + TK_QUOTE_OPEN, + TK_CHAR_PROPERTY, /* \p{...}, \P{...} */ + /* in cc */ + TK_CC_CLOSE, + TK_CC_RANGE, + TK_POSIX_BRACKET_OPEN, + TK_CC_AND, /* && */ + TK_CC_CC_OPEN /* [ */ +}; + +typedef struct { + enum TokenSyms type; + int escaped; + int base; /* is number: 8, 16 (used in [....]) */ + UChar* backp; + union { + UChar* s; + int c; + OnigCodePoint code; + int anchor; + int subtype; + struct { + int lower; + int upper; + int greedy; + int possessive; + } repeat; + struct { + int num; + int ref1; + int* refs; + int by_name; + } backref; + struct { + UChar* name; + UChar* name_end; + } call; + struct { + int not; + } prop; + } u; +} OnigToken; + + +static int +fetch_range_qualifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env) +{ + int low, up, syn_allow, non_low = 0; + int r = 0; + OnigCodePoint c; + OnigEncoding enc = env->enc; + UChar* p = *src; + PFETCH_READY; + + syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL); + + if (PEND) { + if (syn_allow) + return 1; /* "....{" : OK! */ + else + return ONIGERR_END_PATTERN_AT_LEFT_BRACE; /* "....{" syntax error */ + } + + if (! syn_allow) { + c = PPEEK; + if (c == ')' || c == '(' || c == '|') { + return ONIGERR_END_PATTERN_AT_LEFT_BRACE; + } + } + + low = onig_scan_unsigned_number(&p, end, env->enc); + if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; + if (low > ONIG_MAX_REPEAT_NUM) + return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; + + if (p == *src) { /* can't read low */ + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) { + /* allow {,n} as {0,n} */ + low = 0; + non_low = 1; + } + else + goto invalid; + } + + if (PEND) goto invalid; + PFETCH(c); + if (c == ',') { + UChar* prev = p; + up = onig_scan_unsigned_number(&p, end, env->enc); + if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; + if (up > ONIG_MAX_REPEAT_NUM) + return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; + + if (p == prev) { + if (non_low != 0) + goto invalid; + up = REPEAT_INFINITE; /* {n,} : {n,infinite} */ + } + } + else { + if (non_low != 0) + goto invalid; + + PUNFETCH; + up = low; /* {n} : exact n times */ + r = 2; /* fixed */ + } + + if (PEND) goto invalid; + PFETCH(c); + if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) { + if (c != MC_ESC(enc)) goto invalid; + PFETCH(c); + } + if (c != '}') goto invalid; + + if (!IS_REPEAT_INFINITE(up) && low > up) { + return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE; + } + + tok->type = TK_INTERVAL; + tok->u.repeat.lower = low; + tok->u.repeat.upper = up; + *src = p; + return r; /* 0: normal {n,m}, 2: fixed {n} */ + + invalid: + if (syn_allow) + return 1; /* OK */ + else + return ONIGERR_INVALID_REPEAT_RANGE_PATTERN; +} + +/* \M-, \C-, \c, or \... */ +static int +fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env) +{ + int v; + OnigCodePoint c; + OnigEncoding enc = env->enc; + UChar* p = *src; + PFETCH_READY; + + if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE; + + PFETCH(c); + switch (c) { + case 'M': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) { + if (PEND) return ONIGERR_END_PATTERN_AT_META; + PFETCH(c); + if (c != '-') return ONIGERR_META_CODE_SYNTAX; + if (PEND) return ONIGERR_END_PATTERN_AT_META; + PFETCH(c); + if (c == MC_ESC(enc)) { + v = fetch_escaped_value(&p, end, env); + if (v < 0) return v; + c = (OnigCodePoint )v; + } + c = ((c & 0xff) | 0x80); + } + else + goto backslash; + break; + + case 'C': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) { + if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL; + PFETCH(c); + if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX; + goto control; + } + else + goto backslash; + + case 'c': + if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) { + control: + if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL; + PFETCH(c); + if (c == MC_ESC(enc)) { + v = fetch_escaped_value(&p, end, env); + if (v < 0) return v; + c = (OnigCodePoint )v; + } + else if (c == '?') + c = 0177; + else + c &= 0x9f; + break; + } + /* fall through */ + + default: + { + backslash: + c = conv_backslash_value(c, env); + } + break; + } + + *src = p; + return c; +} + +static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env); + +#ifdef USE_NAMED_GROUP +/* + def: 0 -> define name (don't allow number name) + 1 -> reference name (allow number name) +*/ +static int +fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref) +{ + int r, is_num; + OnigCodePoint c = 0; + OnigCodePoint first_code; + OnigEncoding enc = env->enc; + UChar *name_end; + UChar *p = *src; + PFETCH_READY; + + name_end = end; + r = 0; + is_num = 0; + if (PEND) { + return ONIGERR_EMPTY_GROUP_NAME; + } + else { + PFETCH(c); + first_code = c; + if (c == '>') + return ONIGERR_EMPTY_GROUP_NAME; + + if (ONIGENC_IS_CODE_DIGIT(enc, c)) { + if (ref == 1) + is_num = 1; + else { + r = ONIGERR_INVALID_GROUP_NAME; + } + } + else if (!ONIGENC_IS_CODE_WORD(enc, c)) { + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } + } + + while (!PEND) { + name_end = p; + PFETCH(c); + if (c == '>' || c == ')') break; + + if (is_num == 1) { + if (! ONIGENC_IS_CODE_DIGIT(enc, c)) { + if (!ONIGENC_IS_CODE_WORD(enc, c)) + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + else + r = ONIGERR_INVALID_GROUP_NAME; + } + } + else { + if (!ONIGENC_IS_CODE_WORD(enc, c)) { + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } + } + } + + if (c != '>') { + r = ONIGERR_INVALID_GROUP_NAME; + name_end = end; + } + else { + if (ONIGENC_IS_CODE_ASCII(first_code) && + ONIGENC_IS_CODE_UPPER(enc, first_code)) + r = ONIGERR_INVALID_GROUP_NAME; + } + + if (r == 0) { + *rname_end = name_end; + *src = p; + return 0; + } + else { + onig_scan_env_set_error_string(env, r, *src, name_end); + return r; + } +} +#else +static int +fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref) +{ + int r, len; + OnigCodePoint c = 0; + UChar *name_end; + OnigEncoding enc = env->enc; + UChar *p = *src; + PFETCH_READY; + + r = 0; + while (!PEND) { + name_end = p; + if (enc_len(enc, p) > 1) + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + + PFETCH(c); + if (c == '>' || c == ')') break; + if (! ONIGENC_IS_CODE_DIGIT(enc, c)) + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } + if (c != '>') { + r = ONIGERR_INVALID_GROUP_NAME; + name_end = end; + } + + if (r == 0) { + *rname_end = name_end; + *src = p; + return 0; + } + else { + err: + onig_scan_env_set_error_string(env, r, *src, name_end); + return r; + } +} +#endif + +static void +CC_ESC_WARN(ScanEnv* env, UChar *c) +{ + if (onig_warn == onig_null_warn) return ; + + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) && + IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) { + char buf[WARN_BUFSIZE]; + onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, + env->pattern, env->pattern_end, + "character class has '%s' without escape", c); + (*onig_warn)(buf); + } +} + +static void +CCEND_ESC_WARN(ScanEnv* env, UChar* c) +{ + if (onig_warn == onig_null_warn) return ; + + if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) { + char buf[WARN_BUFSIZE]; + onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc, + (env)->pattern, (env)->pattern_end, + "regular expression has '%s' without escape", c); + (*onig_warn)(buf); + } +} + +static UChar* +find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to, + UChar **next, OnigEncoding enc) +{ + int i; + OnigCodePoint x; + UChar *q; + UChar *p = from; + + while (p < to) { + x = ONIGENC_MBC_TO_CODE(enc, p, to); + q = p + enc_len(enc, p); + if (x == s[0]) { + for (i = 1; i < n && q < to; i++) { + x = ONIGENC_MBC_TO_CODE(enc, q, to); + if (x != s[i]) break; + q += enc_len(enc, q); + } + if (i >= n) { + if (IS_NOT_NULL(next)) + *next = q; + return p; + } + } + p = q; + } + return NULL_UCHARP; +} + +static int +str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to, + OnigCodePoint bad, OnigEncoding enc) +{ + int i, in_esc; + OnigCodePoint x; + UChar *q; + UChar *p = from; + + in_esc = 0; + while (p < to) { + if (in_esc) { + in_esc = 0; + p += enc_len(enc, p); + } + else { + x = ONIGENC_MBC_TO_CODE(enc, p, to); + q = p + enc_len(enc, p); + if (x == s[0]) { + for (i = 1; i < n && q < to; i++) { + x = ONIGENC_MBC_TO_CODE(enc, q, to); + if (x != s[i]) break; + q += enc_len(enc, q); + } + if (i >= n) return 1; + p += enc_len(enc, p); + } + else { + x = ONIGENC_MBC_TO_CODE(enc, p, to); + if (x == bad) return 0; + else if (x == MC_ESC(enc)) in_esc = 1; + p = q; + } + } + } + return 0; +} + +static int +fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) +{ + int num; + OnigCodePoint c, c2; + OnigSyntaxType* syn = env->syntax; + OnigEncoding enc = env->enc; + UChar* prev; + UChar* p = *src; + PFETCH_READY; + + if (PEND) { + tok->type = TK_EOT; + return tok->type; + } + + PFETCH(c); + tok->type = TK_CHAR; + tok->base = 0; + tok->u.c = c; + if (c == ']') { + tok->type = TK_CC_CLOSE; + } + else if (c == '-') { + tok->type = TK_CC_RANGE; + } + else if (c == MC_ESC(enc)) { + if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) + goto end; + + if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE; + + PFETCH(c); + tok->escaped = 1; + tok->u.c = c; + switch (c) { + case 'w': + tok->type = TK_CHAR_TYPE; + tok->u.subtype = CTYPE_WORD; + break; + case 'W': + tok->type = TK_CHAR_TYPE; + tok->u.subtype = CTYPE_NOT_WORD; + break; + case 'd': + tok->type = TK_CHAR_TYPE; + tok->u.subtype = CTYPE_DIGIT; + break; + case 'D': + tok->type = TK_CHAR_TYPE; + tok->u.subtype = CTYPE_NOT_DIGIT; + break; + case 's': + tok->type = TK_CHAR_TYPE; + tok->u.subtype = CTYPE_WHITE_SPACE; + break; + case 'S': + tok->type = TK_CHAR_TYPE; + tok->u.subtype = CTYPE_NOT_WHITE_SPACE; + break; + case 'h': + if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; + tok->type = TK_CHAR_TYPE; + tok->u.subtype = CTYPE_XDIGIT; + break; + case 'H': + if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; + tok->type = TK_CHAR_TYPE; + tok->u.subtype = CTYPE_NOT_XDIGIT; + break; + + case 'p': + case 'P': + c2 = PPEEK; + if (c2 == '{' && + IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) { + PINC; + tok->type = TK_CHAR_PROPERTY; + tok->u.prop.not = (c == 'P' ? 1 : 0); + + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) { + PFETCH(c2); + if (c2 == '^') { + tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0); + } + else + PUNFETCH; + } + } + break; + + case 'x': + if (PEND) break; + + prev = p; + if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) { + PINC; + num = scan_unsigned_hexadecimal_number(&p, end, 8, enc); + if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; + if (!PEND) { + c2 = PPEEK; + if (ONIGENC_IS_CODE_XDIGIT(enc, c2)) + return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; + } + + if (p > prev + enc_len(enc, prev) && !PEND && (PPEEK_IS('}'))) { + PINC; + tok->type = TK_CODE_POINT; + tok->base = 16; + tok->u.code = (OnigCodePoint )num; + } + else { + /* can't read nothing or invalid format */ + p = prev; + } + } + else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) { + num = scan_unsigned_hexadecimal_number(&p, end, 2, enc); + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (p == prev) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + tok->type = TK_RAW_BYTE; + tok->base = 16; + tok->u.c = num; + } + break; + + case 'u': + if (PEND) break; + + prev = p; + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) { + num = scan_unsigned_hexadecimal_number(&p, end, 4, enc); + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (p == prev) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + tok->type = TK_CODE_POINT; + tok->base = 16; + tok->u.code = (OnigCodePoint )num; + } + break; + + case '0': + case '1': case '2': case '3': case '4': case '5': case '6': case '7': + if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) { + PUNFETCH; + prev = p; + num = scan_unsigned_octal_number(&p, end, 3, enc); + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (p == prev) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + tok->type = TK_RAW_BYTE; + tok->base = 8; + tok->u.c = num; + } + break; + + default: + PUNFETCH; + num = fetch_escaped_value(&p, end, env); + if (num < 0) return num; + if (tok->u.c != num) { + tok->u.code = (OnigCodePoint )num; + tok->type = TK_CODE_POINT; + } + break; + } + } + else if (c == '[') { + if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) { + OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' }; + tok->backp = p; /* point at '[' is readed */ + PINC; + if (str_exist_check_with_esc(send, 2, p, end, + (OnigCodePoint )']', enc)) { + tok->type = TK_POSIX_BRACKET_OPEN; + } + else { + PUNFETCH; + goto cc_in_cc; + } + } + else { + cc_in_cc: + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) { + tok->type = TK_CC_CC_OPEN; + } + else { + CC_ESC_WARN(env, "["); + } + } + } + else if (c == '&') { + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) && + !PEND && (PPEEK_IS('&'))) { + PINC; + tok->type = TK_CC_AND; + } + } + + end: + *src = p; + return tok->type; +} + +static int +fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) +{ + int r, num; + OnigCodePoint c; + OnigEncoding enc = env->enc; + OnigSyntaxType* syn = env->syntax; + UChar* prev; + UChar* p = *src; + PFETCH_READY; + + start: + if (PEND) { + tok->type = TK_EOT; + return tok->type; + } + + tok->type = TK_STRING; + tok->base = 0; + tok->backp = p; + + PFETCH(c); + if (c == MC_ESC(enc)) { + if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE; + + tok->backp = p; + PFETCH(c); + + tok->u.c = c; + tok->escaped = 1; + switch (c) { + case '*': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break; + tok->type = TK_OP_REPEAT; + tok->u.repeat.lower = 0; + tok->u.repeat.upper = REPEAT_INFINITE; + goto greedy_check; + break; + + case '+': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break; + tok->type = TK_OP_REPEAT; + tok->u.repeat.lower = 1; + tok->u.repeat.upper = REPEAT_INFINITE; + goto greedy_check; + break; + + case '?': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break; + tok->type = TK_OP_REPEAT; + tok->u.repeat.lower = 0; + tok->u.repeat.upper = 1; + greedy_check: + if (!PEND && PPEEK_IS('?') && + IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) { + PFETCH(c); + tok->u.repeat.greedy = 0; + tok->u.repeat.possessive = 0; + } + else { + possessive_check: + if (!PEND && PPEEK_IS('+') && + ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) && + tok->type != TK_INTERVAL) || + (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) && + tok->type == TK_INTERVAL))) { + PFETCH(c); + tok->u.repeat.greedy = 1; + tok->u.repeat.possessive = 1; + } + else { + tok->u.repeat.greedy = 1; + tok->u.repeat.possessive = 0; + } + } + break; + + case '{': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break; + r = fetch_range_qualifier(&p, end, tok, env); + if (r < 0) return r; /* error */ + if (r == 0) goto greedy_check; + else if (r == 2) { /* {n} */ + if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY)) + goto possessive_check; + + goto greedy_check; + } + /* r == 1 : normal char */ + break; + + case '|': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break; + tok->type = TK_ALT; + break; + + case '(': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break; + tok->type = TK_SUBEXP_OPEN; + break; + + case ')': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break; + tok->type = TK_SUBEXP_CLOSE; + break; + + case 'w': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break; + tok->type = TK_CHAR_TYPE; + tok->u.subtype = CTYPE_WORD; + break; + + case 'W': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break; + tok->type = TK_CHAR_TYPE; + tok->u.subtype = CTYPE_NOT_WORD; + break; + + case 'b': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break; + tok->type = TK_ANCHOR; + tok->u.anchor = ANCHOR_WORD_BOUND; + break; + + case 'B': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break; + tok->type = TK_ANCHOR; + tok->u.anchor = ANCHOR_NOT_WORD_BOUND; + break; + +#ifdef USE_WORD_BEGIN_END + case '<': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break; + tok->type = TK_ANCHOR; + tok->u.anchor = ANCHOR_WORD_BEGIN; + break; + + case '>': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break; + tok->type = TK_ANCHOR; + tok->u.anchor = ANCHOR_WORD_END; + break; +#endif + + case 's': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break; + tok->type = TK_CHAR_TYPE; + tok->u.subtype = CTYPE_WHITE_SPACE; + break; + + case 'S': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break; + tok->type = TK_CHAR_TYPE; + tok->u.subtype = CTYPE_NOT_WHITE_SPACE; + break; + + case 'd': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break; + tok->type = TK_CHAR_TYPE; + tok->u.subtype = CTYPE_DIGIT; + break; + + case 'D': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break; + tok->type = TK_CHAR_TYPE; + tok->u.subtype = CTYPE_NOT_DIGIT; + break; + + case 'h': + if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; + tok->type = TK_CHAR_TYPE; + tok->u.subtype = CTYPE_XDIGIT; + break; + + case 'H': + if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; + tok->type = TK_CHAR_TYPE; + tok->u.subtype = CTYPE_NOT_XDIGIT; + break; + + case 'A': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; + begin_buf: + tok->type = TK_ANCHOR; + tok->u.subtype = ANCHOR_BEGIN_BUF; + break; + + case 'Z': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; + tok->type = TK_ANCHOR; + tok->u.subtype = ANCHOR_SEMI_END_BUF; + break; + + case 'z': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; + end_buf: + tok->type = TK_ANCHOR; + tok->u.subtype = ANCHOR_END_BUF; + break; + + case 'G': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break; + tok->type = TK_ANCHOR; + tok->u.subtype = ANCHOR_BEGIN_POSITION; + break; + + case '`': + if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break; + goto begin_buf; + break; + + case '\'': + if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break; + goto end_buf; + break; + + case 'x': + if (PEND) break; + + prev = p; + if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) { + PINC; + num = scan_unsigned_hexadecimal_number(&p, end, 8, enc); + if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; + if (!PEND) { + if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK)) + return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; + } + + if ((p > prev + enc_len(enc, prev)) && !PEND && PPEEK_IS('}')) { + PINC; + tok->type = TK_CODE_POINT; + tok->u.code = (OnigCodePoint )num; + } + else { + /* can't read nothing or invalid format */ + p = prev; + } + } + else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) { + num = scan_unsigned_hexadecimal_number(&p, end, 2, enc); + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (p == prev) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + tok->type = TK_RAW_BYTE; + tok->base = 16; + tok->u.c = num; + } + break; + + case 'u': + if (PEND) break; + + prev = p; + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) { + num = scan_unsigned_hexadecimal_number(&p, end, 4, enc); + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (p == prev) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + tok->type = TK_CODE_POINT; + tok->base = 16; + tok->u.code = (OnigCodePoint )num; + } + break; + + case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + PUNFETCH; + prev = p; + num = onig_scan_unsigned_number(&p, end, enc); + if (num < 0 || num > ONIG_MAX_BACKREF_NUM) { + goto skip_backref; + } + + if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) && + (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */ + if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { + if (num > env->num_mem || IS_NULL(SCANENV_MEM_NODES(env)[num])) + return ONIGERR_INVALID_BACKREF; + } + + tok->type = TK_BACKREF; + tok->u.backref.num = 1; + tok->u.backref.ref1 = num; + tok->u.backref.by_name = 0; + break; + } + + skip_backref: + if (c == '8' || c == '9') { + /* normal char */ + p = prev; PINC; + break; + } + + p = prev; + /* fall through */ + case '0': + if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) { + prev = p; + num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc); + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (p == prev) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + tok->type = TK_RAW_BYTE; + tok->base = 8; + tok->u.c = num; + } + else if (c != '0') { + PINC; + } + break; + +#ifdef USE_NAMED_GROUP + case 'k': + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) { + PFETCH(c); + if (c == '<') { + UChar* name_end; + int* backs; + + prev = p; + r = fetch_name(&p, end, &name_end, env, 1); + if (r < 0) return r; + num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs); + if (num <= 0) { + onig_scan_env_set_error_string(env, + ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end); + return ONIGERR_UNDEFINED_NAME_REFERENCE; + } + if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { + int i; + for (i = 0; i < num; i++) { + if (backs[i] > env->num_mem || + IS_NULL(SCANENV_MEM_NODES(env)[backs[i]])) + return ONIGERR_INVALID_BACKREF; + } + } + + tok->type = TK_BACKREF; + tok->u.backref.by_name = 1; + if (num == 1) { + tok->u.backref.num = 1; + tok->u.backref.ref1 = backs[0]; + } + else { + tok->u.backref.num = num; + tok->u.backref.refs = backs; + } + } + else + PUNFETCH; + } + break; +#endif + +#ifdef USE_SUBEXP_CALL + case 'g': + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) { + PFETCH(c); + if (c == '<') { + UChar* name_end; + + prev = p; + r = fetch_name(&p, end, &name_end, env, 1); + if (r < 0) return r; + + tok->type = TK_CALL; + tok->u.call.name = prev; + tok->u.call.name_end = name_end; + } + else + PUNFETCH; + } + break; +#endif + + case 'Q': + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) { + tok->type = TK_QUOTE_OPEN; + } + break; + + case 'p': + case 'P': + if (PPEEK_IS('{') && + IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) { + PINC; + tok->type = TK_CHAR_PROPERTY; + tok->u.prop.not = (c == 'P' ? 1 : 0); + + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) { + PFETCH(c); + if (c == '^') { + tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0); + } + else + PUNFETCH; + } + } + break; + + default: + PUNFETCH; + num = fetch_escaped_value(&p, end, env); + if (num < 0) return num; + /* set_raw: */ + if (tok->u.c != num) { + tok->type = TK_CODE_POINT; + tok->u.code = (OnigCodePoint )num; + } + else { /* string */ + p = tok->backp + enc_len(enc, tok->backp); + } + break; + } + } + else { + tok->u.c = c; + tok->escaped = 0; + +#ifdef USE_VARIABLE_META_CHARS + if ((c != ONIG_INEFFECTIVE_META_CHAR) && + IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) { + if (c == MC_ANYCHAR(enc)) + goto any_char; + else if (c == MC_ANYTIME(enc)) + goto anytime; + else if (c == MC_ZERO_OR_ONE_TIME(enc)) + goto zero_or_one_time; + else if (c == MC_ONE_OR_MORE_TIME(enc)) + goto one_or_more_time; + else if (c == MC_ANYCHAR_ANYTIME(enc)) { + tok->type = TK_ANYCHAR_ANYTIME; + goto out; + } + } +#endif + + switch (c) { + case '.': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break; + any_char: + tok->type = TK_ANYCHAR; + break; + + case '*': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break; + anytime: + tok->type = TK_OP_REPEAT; + tok->u.repeat.lower = 0; + tok->u.repeat.upper = REPEAT_INFINITE; + goto greedy_check; + break; + + case '+': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break; + one_or_more_time: + tok->type = TK_OP_REPEAT; + tok->u.repeat.lower = 1; + tok->u.repeat.upper = REPEAT_INFINITE; + goto greedy_check; + break; + + case '?': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break; + zero_or_one_time: + tok->type = TK_OP_REPEAT; + tok->u.repeat.lower = 0; + tok->u.repeat.upper = 1; + goto greedy_check; + break; + + case '{': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break; + r = fetch_range_qualifier(&p, end, tok, env); + if (r < 0) return r; /* error */ + if (r == 0) goto greedy_check; + else if (r == 2) { /* {n} */ + if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY)) + goto possessive_check; + + goto greedy_check; + } + /* r == 1 : normal char */ + break; + + case '|': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break; + tok->type = TK_ALT; + break; + + case '(': + if (PPEEK_IS('?') && + IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) { + PINC; + if (PPEEK_IS('#')) { + PFETCH(c); + while (1) { + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + PFETCH(c); + if (c == MC_ESC(enc)) { + if (!PEND) PFETCH(c); + } + else { + if (c == ')') break; + } + } + goto start; + } + PUNFETCH; + } + + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break; + tok->type = TK_SUBEXP_OPEN; + break; + + case ')': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break; + tok->type = TK_SUBEXP_CLOSE; + break; + + case '^': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break; + tok->type = TK_ANCHOR; + tok->u.subtype = (IS_SINGLELINE(env->option) + ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE); + break; + + case '$': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break; + tok->type = TK_ANCHOR; + tok->u.subtype = (IS_SINGLELINE(env->option) + ? ANCHOR_END_BUF : ANCHOR_END_LINE); + break; + + case '[': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break; + tok->type = TK_CC_OPEN; + break; + + case ']': + if (*src > env->pattern) /* /].../ is allowed. */ + CCEND_ESC_WARN(env, "]"); + break; + + case '#': + if (IS_EXTEND(env->option)) { + while (!PEND) { + PFETCH(c); + if (ONIGENC_IS_CODE_NEWLINE(enc, c)) + break; + } + goto start; + break; + } + break; + + case ' ': case '\t': case '\n': case '\r': case '\f': + if (IS_EXTEND(env->option)) + goto start; + break; + + default: + /* string */ + break; + } + } + + out: + *src = p; + return tok->type; +} + +static int +add_ctype_to_cc_by_range(CClassNode* cc, int ctype, int not, OnigEncoding enc, + OnigCodePoint sbr[], OnigCodePoint mbr[]) +{ + int i, r; + OnigCodePoint j; + + int nsb = ONIGENC_CODE_RANGE_NUM(sbr); + int nmb = ONIGENC_CODE_RANGE_NUM(mbr); + + if (not == 0) { + for (i = 0; i < nsb; i++) { + for (j = ONIGENC_CODE_RANGE_FROM(sbr, i); + j <= ONIGENC_CODE_RANGE_TO(sbr, i); j++) { + BITSET_SET_BIT(cc->bs, j); + } + } + + for (i = 0; i < nmb; i++) { + r = add_code_range_to_buf(&(cc->mbuf), + ONIGENC_CODE_RANGE_FROM(mbr, i), + ONIGENC_CODE_RANGE_TO(mbr, i)); + if (r != 0) return r; + } + } + else { + OnigCodePoint prev = 0; + + if (ONIGENC_MBC_MINLEN(enc) == 1) { + for (i = 0; i < nsb; i++) { + for (j = prev; + j < ONIGENC_CODE_RANGE_FROM(sbr, i); j++) { + BITSET_SET_BIT(cc->bs, j); + } + prev = ONIGENC_CODE_RANGE_TO(sbr, i) + 1; + } + if (prev < 0x7f) { + for (j = prev; j < 0x7f; j++) { + BITSET_SET_BIT(cc->bs, j); + } + } + + prev = 0x80; + } + + for (i = 0; i < nmb; i++) { + if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) { + r = add_code_range_to_buf(&(cc->mbuf), prev, + ONIGENC_CODE_RANGE_FROM(mbr, i) - 1); + if (r != 0) return r; + } + prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1; + } + if (prev < 0x7fffffff) { + r = add_code_range_to_buf(&(cc->mbuf), prev, 0x7fffffff); + if (r != 0) return r; + } + } + + return 0; +} + +static int +add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env) +{ + int c, r; + OnigCodePoint *sbr, *mbr; + OnigEncoding enc = env->enc; + + r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sbr, &mbr); + if (r == 0) { + return add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sbr, mbr); + } + else if (r != ONIG_NO_SUPPORT_CONFIG) { + return r; + } + + r = 0; + switch (ctype) { + case ONIGENC_CTYPE_ALPHA: + case ONIGENC_CTYPE_BLANK: + case ONIGENC_CTYPE_CNTRL: + case ONIGENC_CTYPE_DIGIT: + case ONIGENC_CTYPE_LOWER: + case ONIGENC_CTYPE_PUNCT: + case ONIGENC_CTYPE_SPACE: + case ONIGENC_CTYPE_UPPER: + case ONIGENC_CTYPE_XDIGIT: + case ONIGENC_CTYPE_ASCII: + case ONIGENC_CTYPE_ALNUM: + if (not != 0) { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) + BITSET_SET_BIT(cc->bs, c); + } + ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); + } + else { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) + BITSET_SET_BIT(cc->bs, c); + } + } + break; + + case ONIGENC_CTYPE_GRAPH: + case ONIGENC_CTYPE_PRINT: + if (not != 0) { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) + BITSET_SET_BIT(cc->bs, c); + } + } + else { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) + BITSET_SET_BIT(cc->bs, c); + } + ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); + } + break; + + case ONIGENC_CTYPE_WORD: + if (not == 0) { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if (ONIGENC_IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT(cc->bs, c); + } + ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); + } + else { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* 0: invalid code point */ + && ! ONIGENC_IS_CODE_WORD(enc, c)) + BITSET_SET_BIT(cc->bs, c); + } + } + break; + + default: + return ONIGERR_PARSER_BUG; + break; + } + + return r; +} + +static int +parse_ctype_to_enc_ctype(int pctype, int* not) +{ + int ctype; + + switch (pctype) { + case CTYPE_WORD: + ctype = ONIGENC_CTYPE_WORD; + *not = 0; + break; + case CTYPE_NOT_WORD: + ctype = ONIGENC_CTYPE_WORD; + *not = 1; + break; + case CTYPE_WHITE_SPACE: + ctype = ONIGENC_CTYPE_SPACE; + *not = 0; + break; + case CTYPE_NOT_WHITE_SPACE: + ctype = ONIGENC_CTYPE_SPACE; + *not = 1; + break; + case CTYPE_DIGIT: + ctype = ONIGENC_CTYPE_DIGIT; + *not = 0; + break; + case CTYPE_NOT_DIGIT: + ctype = ONIGENC_CTYPE_DIGIT; + *not = 1; + break; + case CTYPE_XDIGIT: + ctype = ONIGENC_CTYPE_XDIGIT; + *not = 0; + break; + case CTYPE_NOT_XDIGIT: + ctype = ONIGENC_CTYPE_XDIGIT; + *not = 1; + break; + default: + return ONIGERR_PARSER_BUG; + break; + } + return ctype; +} + +typedef struct { + UChar *name; + int ctype; + short int len; +} PosixBracketEntryType; + +static int +parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env) +{ +#define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20 +#define POSIX_BRACKET_NAME_MAX_LEN 6 + + static PosixBracketEntryType PBS[] = { + { "alnum", ONIGENC_CTYPE_ALNUM, 5 }, + { "alpha", ONIGENC_CTYPE_ALPHA, 5 }, + { "blank", ONIGENC_CTYPE_BLANK, 5 }, + { "cntrl", ONIGENC_CTYPE_CNTRL, 5 }, + { "digit", ONIGENC_CTYPE_DIGIT, 5 }, + { "graph", ONIGENC_CTYPE_GRAPH, 5 }, + { "lower", ONIGENC_CTYPE_LOWER, 5 }, + { "print", ONIGENC_CTYPE_PRINT, 5 }, + { "punct", ONIGENC_CTYPE_PUNCT, 5 }, + { "space", ONIGENC_CTYPE_SPACE, 5 }, + { "upper", ONIGENC_CTYPE_UPPER, 5 }, + { "xdigit", ONIGENC_CTYPE_XDIGIT, 6 }, + { "ascii", ONIGENC_CTYPE_ASCII, 5 }, /* I don't know origin. Perl? */ + { (UChar* )NULL, -1, 0 } + }; + + PosixBracketEntryType *pb; + int not, i, r; + OnigCodePoint c; + OnigEncoding enc = env->enc; + UChar *p = *src; + PFETCH_READY; + + if (PPEEK_IS('^')) { + PINC; + not = 1; + } + else + not = 0; + + if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MAX_LEN + 2) + goto not_posix_bracket; + + for (pb = PBS; IS_NOT_NULL(pb->name); pb++) { + if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) { + p = (UChar* )onigenc_step(enc, p, end, pb->len); + if (onigenc_with_ascii_strncmp(enc, p, end, ":]", 2) != 0) + return ONIGERR_INVALID_POSIX_BRACKET_TYPE; + + r = add_ctype_to_cc(cc, pb->ctype, not, env); + if (r != 0) return r; + + PINC; PINC; + *src = p; + return 0; + } + } + + not_posix_bracket: + c = 0; + i = 0; + while (!PEND && ((c = PPEEK) != ':') && c != ']') { + PINC; + if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break; + } + if (c == ':' && ! PEND) { + PINC; + if (! PEND) { + PFETCH(c); + if (c == ']') + return ONIGERR_INVALID_POSIX_BRACKET_TYPE; + } + } + + return 1; /* 1: is not POSIX bracket, but no error. */ +} + +static int +property_name_to_ctype(UChar* p, UChar* end, OnigEncoding enc) +{ + static PosixBracketEntryType PBS[] = { + { "Alnum", ONIGENC_CTYPE_ALNUM, 5 }, + { "Alpha", ONIGENC_CTYPE_ALPHA, 5 }, + { "Blank", ONIGENC_CTYPE_BLANK, 5 }, + { "Cntrl", ONIGENC_CTYPE_CNTRL, 5 }, + { "Digit", ONIGENC_CTYPE_DIGIT, 5 }, + { "Graph", ONIGENC_CTYPE_GRAPH, 5 }, + { "Lower", ONIGENC_CTYPE_LOWER, 5 }, + { "Print", ONIGENC_CTYPE_PRINT, 5 }, + { "Punct", ONIGENC_CTYPE_PUNCT, 5 }, + { "Space", ONIGENC_CTYPE_SPACE, 5 }, + { "Upper", ONIGENC_CTYPE_UPPER, 5 }, + { "XDigit", ONIGENC_CTYPE_XDIGIT, 6 }, + { "ASCII", ONIGENC_CTYPE_ASCII, 5 }, + { (UChar* )NULL, -1, 0 } + }; + + PosixBracketEntryType *pb; + int len; + + len = onigenc_strlen(enc, p, end); + for (pb = PBS; IS_NOT_NULL(pb->name); pb++) { + if (len == pb->len && + onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) + return pb->ctype; + } + + return -1; +} + +static int +fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env) +{ + int ctype; + OnigCodePoint c; + OnigEncoding enc = env->enc; + UChar *prev, *start, *p = *src; + PFETCH_READY; + + /* 'IsXXXX' => 'XXXX' */ + if (!PEND && + IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS)) { + c = PPEEK; + if (c == 'I') { + PINC; + if (! PEND) { + c = PPEEK; + if (c == 's') + PINC; + else + PUNFETCH; + } + } + } + + start = prev = p; + + while (!PEND) { + prev = p; + PFETCH(c); + if (c == '}') { + ctype = property_name_to_ctype(start, prev, enc); + if (ctype < 0) break; + + *src = p; + return ctype; + } + else if (c == '(' || c == ')' || c == '{' || c == '|') + break; + } + + onig_scan_env_set_error_string(env, ONIGERR_INVALID_CHAR_PROPERTY_NAME, + *src, prev); + return ONIGERR_INVALID_CHAR_PROPERTY_NAME; +} + +static int +parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end, + ScanEnv* env) +{ + int r, ctype; + CClassNode* cc; + + ctype = fetch_char_property_to_ctype(src, end, env); + if (ctype < 0) return ctype; + + *np = node_new_cclass(); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + cc = &(NCCLASS(*np)); + r = add_ctype_to_cc(cc, ctype, 0, env); + if (r != 0) return r; + if (tok->u.prop.not != 0) CCLASS_SET_NOT(cc); + + return 0; +} + + +enum CCSTATE { + CCS_VALUE, + CCS_RANGE, + CCS_COMPLETE, + CCS_START +}; + +enum CCVALTYPE { + CCV_SB, + CCV_CODE_POINT, + CCV_CLASS +}; + +static int +next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type, + enum CCSTATE* state, ScanEnv* env) +{ + int r; + + if (*state == CCS_RANGE) + return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE; + + if (*state == CCS_VALUE && *type != CCV_CLASS) { + if (*type == CCV_SB) + BITSET_SET_BIT(cc->bs, (int )(*vs)); + else if (*type == CCV_CODE_POINT) { + r = add_code_range(&(cc->mbuf), env, *vs, *vs); + if (r < 0) return r; + } + } + + *state = CCS_VALUE; + *type = CCV_CLASS; + return 0; +} + +static int +next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v, + int* vs_israw, int v_israw, + enum CCVALTYPE intype, enum CCVALTYPE* type, + enum CCSTATE* state, ScanEnv* env) +{ + int r; + + switch (*state) { + case CCS_VALUE: + if (*type == CCV_SB) + BITSET_SET_BIT(cc->bs, (int )(*vs)); + else if (*type == CCV_CODE_POINT) { + r = add_code_range(&(cc->mbuf), env, *vs, *vs); + if (r < 0) return r; + } + break; + + case CCS_RANGE: + if (intype == *type) { + if (intype == CCV_SB) { + if (*vs > 0xff || v > 0xff) + return ONIGERR_INVALID_WIDE_CHAR_VALUE; + + if (*vs > v) { + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) + goto ccs_range_end; + else + return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; + } + bitset_set_range(cc->bs, (int )*vs, (int )v); + } + else { + r = add_code_range(&(cc->mbuf), env, *vs, v); + if (r < 0) return r; + } + } + else { +#if 0 + if (intype == CCV_CODE_POINT && *type == CCV_SB) { +#endif + if (*vs > v) { + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) + goto ccs_range_end; + else + return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; + } + bitset_set_range(cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff)); + r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v); + if (r < 0) return r; +#if 0 + } + else + return ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE; +#endif + } + ccs_range_end: + *state = CCS_COMPLETE; + break; + + case CCS_COMPLETE: + case CCS_START: + *state = CCS_VALUE; + break; + + default: + break; + } + + *vs_israw = v_israw; + *vs = v; + *type = intype; + return 0; +} + +static int +code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped, + OnigEncoding enc) +{ + int in_esc; + OnigCodePoint code; + UChar* p = from; + PFETCH_READY; + + in_esc = 0; + while (! PEND) { + if (ignore_escaped && in_esc) { + in_esc = 0; + } + else { + PFETCH(code); + if (code == c) return 1; + if (code == MC_ESC(enc)) in_esc = 1; + } + } + return 0; +} + +static int +parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, + ScanEnv* env) +{ + int r, neg, len, fetched, and_start; + OnigCodePoint v, vs; + UChar *p; + Node* node; + CClassNode *cc, *prev_cc; + CClassNode work_cc; + + enum CCSTATE state; + enum CCVALTYPE val_type, in_type; + int val_israw, in_israw; + + prev_cc = (CClassNode* )NULL; + *np = NULL_NODE; + r = fetch_token_in_cc(tok, src, end, env); + if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) { + neg = 1; + r = fetch_token_in_cc(tok, src, end, env); + } + else { + neg = 0; + } + + if (r < 0) return r; + if (r == TK_CC_CLOSE) { + if (! code_exist_check((OnigCodePoint )']', + *src, env->pattern_end, 1, env->enc)) + return ONIGERR_EMPTY_CHAR_CLASS; + + CC_ESC_WARN(env, "]"); + r = tok->type = TK_CHAR; /* allow []...] */ + } + + *np = node = node_new_cclass(); + CHECK_NULL_RETURN_VAL(node, ONIGERR_MEMORY); + cc = &(NCCLASS(node)); + + and_start = 0; + state = CCS_START; + p = *src; + while (r != TK_CC_CLOSE) { + fetched = 0; + switch (r) { + case TK_CHAR: + len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c); + if (len > 1) { + in_type = CCV_CODE_POINT; + } + else { + sb_char: + in_type = CCV_SB; + } + v = (OnigCodePoint )tok->u.c; + in_israw = 0; + goto val_entry2; + break; + + case TK_RAW_BYTE: + /* tok->base != 0 : octal or hexadec. */ + if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) { + UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; + UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN; + UChar* psave = p; + int i, base = tok->base; + + buf[0] = tok->u.c; + for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) { + r = fetch_token_in_cc(tok, &p, end, env); + if (r < 0) goto err; + if (r != TK_RAW_BYTE || tok->base != base) { + fetched = 1; + break; + } + buf[i] = tok->u.c; + } + + if (i < ONIGENC_MBC_MINLEN(env->enc)) { + r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; + goto err; + } + + len = enc_len(env->enc, buf); + if (i < len) { + r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; + goto err; + } + else if (i > len) { /* fetch back */ + p = psave; + for (i = 1; i < len; i++) { + r = fetch_token_in_cc(tok, &p, end, env); + } + fetched = 0; + } + + if (i == 1) { + v = (OnigCodePoint )buf[0]; + goto raw_single; + } + else { + v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe); + in_type = CCV_CODE_POINT; + } + } + else { + v = (OnigCodePoint )tok->u.c; + raw_single: + in_type = CCV_SB; + } + in_israw = 1; + goto val_entry2; + break; + + case TK_CODE_POINT: + v = tok->u.code; + in_israw = 1; + val_entry: + len = ONIGENC_CODE_TO_MBCLEN(env->enc, v); + if (len < 0) { + r = len; + goto err; + } + in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT); + val_entry2: + r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type, + &state, env); + if (r != 0) goto err; + break; + + case TK_POSIX_BRACKET_OPEN: + r = parse_posix_bracket(cc, &p, end, env); + if (r < 0) goto err; + if (r == 1) { /* is not POSIX bracket */ + CC_ESC_WARN(env, "["); + p = tok->backp; + v = (OnigCodePoint )tok->u.c; + in_israw = 0; + goto val_entry; + } + goto next_class; + break; + + case TK_CHAR_TYPE: + { + int ctype, not; + ctype = parse_ctype_to_enc_ctype(tok->u.subtype, ¬); + r = add_ctype_to_cc(cc, ctype, not, env); + if (r != 0) return r; + } + + next_class: + r = next_state_class(cc, &vs, &val_type, &state, env); + if (r != 0) goto err; + break; + + case TK_CHAR_PROPERTY: + { + int ctype; + + ctype = fetch_char_property_to_ctype(&p, end, env); + if (ctype < 0) return ctype; + r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env); + if (r != 0) return r; + goto next_class; + } + break; + + case TK_CC_RANGE: + if (state == CCS_VALUE) { + r = fetch_token_in_cc(tok, &p, end, env); + if (r < 0) goto err; + fetched = 1; + if (r == TK_CC_CLOSE) { /* allow [x-] */ + range_end_val: + v = (OnigCodePoint )'-'; + in_israw = 0; + goto val_entry; + } + else if (r == TK_CC_AND) { + CC_ESC_WARN(env, "-"); + goto range_end_val; + } + state = CCS_RANGE; + } + else if (state == CCS_START) { + /* [-xa] is allowed */ + v = (OnigCodePoint )tok->u.c; + in_israw = 0; + + r = fetch_token_in_cc(tok, &p, end, env); + if (r < 0) goto err; + fetched = 1; + /* [--x] or [a&&-x] is warned. */ + if (r == TK_CC_RANGE || and_start != 0) + CC_ESC_WARN(env, "-"); + + goto val_entry; + } + else if (state == CCS_RANGE) { + CC_ESC_WARN(env, "-"); + goto sb_char; /* [!--x] is allowed */ + } + else { /* CCS_COMPLETE */ + r = fetch_token_in_cc(tok, &p, end, env); + if (r < 0) goto err; + fetched = 1; + if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */ + else if (r == TK_CC_AND) { + CC_ESC_WARN(env, "-"); + goto range_end_val; + } + + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) { + CC_ESC_WARN(env, "-"); + goto sb_char; /* [0-9-a] is allowed as [0-9\-a] */ + } + r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS; + goto err; + } + break; + + case TK_CC_CC_OPEN: /* [ */ + { + Node *anode; + CClassNode* acc; + + r = parse_char_class(&anode, tok, &p, end, env); + if (r != 0) goto cc_open_err; + acc = &(NCCLASS(anode)); + r = or_cclass(cc, acc, env->enc); + + onig_node_free(anode); + cc_open_err: + if (r != 0) goto err; + } + break; + + case TK_CC_AND: /* && */ + { + if (state == CCS_VALUE) { + r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type, + &val_type, &state, env); + if (r != 0) goto err; + } + /* initialize local variables */ + and_start = 1; + state = CCS_START; + + if (IS_NOT_NULL(prev_cc)) { + r = and_cclass(prev_cc, cc, env->enc); + if (r != 0) goto err; + bbuf_free(cc->mbuf); + } + else { + prev_cc = cc; + cc = &work_cc; + } + initialize_cclass(cc); + } + break; + + case TK_EOT: + r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS; + goto err; + break; + default: + r = ONIGERR_PARSER_BUG; + goto err; + break; + } + + if (fetched) + r = tok->type; + else { + r = fetch_token_in_cc(tok, &p, end, env); + if (r < 0) goto err; + } + } + + if (state == CCS_VALUE) { + r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type, + &val_type, &state, env); + if (r != 0) goto err; + } + + if (IS_NOT_NULL(prev_cc)) { + r = and_cclass(prev_cc, cc, env->enc); + if (r != 0) goto err; + bbuf_free(cc->mbuf); + cc = prev_cc; + } + + if (neg != 0) + CCLASS_SET_NOT(cc); + else + CCLASS_CLEAR_NOT(cc); + if (IS_CCLASS_NOT(cc) && + IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) { + int is_empty; + + is_empty = (IS_NULL(cc->mbuf) ? 1 : 0); + if (is_empty != 0) + BITSET_IS_EMPTY(cc->bs, is_empty); + + if (is_empty == 0) { +#define NEWLINE_CODE 0x0a + + if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) { + if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1) + BITSET_SET_BIT(cc->bs, NEWLINE_CODE); + else + add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE); + } + } + } + *src = p; + return 0; + + err: + if (cc != &(NCCLASS(*np))) + bbuf_free(cc->mbuf); + onig_node_free(*np); + return r; +} + +static int parse_subexp(Node** top, OnigToken* tok, int term, + UChar** src, UChar* end, ScanEnv* env); + +static int +parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, + ScanEnv* env) +{ + int r, num; + int list_capture; + Node *target; + OnigOptionType option; + OnigEncoding enc = env->enc; + OnigCodePoint c; + UChar* p = *src; + PFETCH_READY; + + *np = NULL; + if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS; + + option = env->option; + if (PPEEK_IS('?') && + IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) { + PINC; + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + + PFETCH(c); + switch (c) { + case ':': /* (?:...) grouping only */ + group: + r = fetch_token(tok, &p, end, env); + if (r < 0) return r; + r = parse_subexp(np, tok, term, &p, end, env); + if (r < 0) return r; + *src = p; + return 1; /* group */ + break; + + case '=': + *np = onig_node_new_anchor(ANCHOR_PREC_READ); + break; + case '!': /* preceding read */ + *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT); + break; + case '>': /* (?>...) stop backtrack */ + *np = node_new_effect(EFFECT_STOP_BACKTRACK); + break; + + case '<': /* look behind (?<=...), (?<!...) */ + PFETCH(c); + if (c == '=') + *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND); + else if (c == '!') + *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT); +#ifdef USE_NAMED_GROUP + else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { + UChar *name; + UChar *name_end; + + PUNFETCH; + list_capture = 0; + + named_group: + name = p; + r = fetch_name(&p, end, &name_end, env, 0); + if (r < 0) return r; + + num = scan_env_add_mem_entry(env); + if (num < 0) return num; + if (list_capture != 0 && num >= BIT_STATUS_BITS_NUM) + return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; + + r = name_add(env->reg, name, name_end, num, env); + if (r != 0) return r; + *np = node_new_effect_memory(env->option, 1); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + NEFFECT(*np).regnum = num; + if (list_capture != 0) + BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num); + env->num_named++; + } +#endif + else + return ONIGERR_UNDEFINED_GROUP_OPTION; + break; + + case '@': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) { +#ifdef USE_NAMED_GROUP + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { + PFETCH(c); + if (c == '<') { + list_capture = 1; + goto named_group; /* (?@<name>...) */ + } + PUNFETCH; + } +#endif + *np = node_new_effect_memory(env->option, 0); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + num = scan_env_add_mem_entry(env); + if (num < 0) { + onig_node_free(*np); + return num; + } + else if (num >= BIT_STATUS_BITS_NUM) { + onig_node_free(*np); + return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; + } + NEFFECT(*np).regnum = num; + BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num); + } + else { + return ONIGERR_UNDEFINED_GROUP_OPTION; + } + break; + +#ifdef USE_POSIXLINE_OPTION + case 'p': +#endif + case '-': case 'i': case 'm': case 's': case 'x': + { + int neg = 0; + + while (1) { + switch (c) { + case ':': + case ')': + break; + + case '-': neg = 1; break; + case 'x': ONOFF(option, ONIG_OPTION_EXTEND, neg); break; + case 'i': ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break; + case 's': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) { + ONOFF(option, ONIG_OPTION_MULTILINE, neg); + } + else + return ONIGERR_UNDEFINED_GROUP_OPTION; + break; + + case 'm': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) { + ONOFF(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0)); + } + else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) { + ONOFF(option, ONIG_OPTION_MULTILINE, neg); + } + else + return ONIGERR_UNDEFINED_GROUP_OPTION; + break; +#ifdef USE_POSIXLINE_OPTION + case 'p': + ONOFF(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg); + break; +#endif + default: + return ONIGERR_UNDEFINED_GROUP_OPTION; + } + + if (c == ')') { + *np = node_new_option(option); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + *src = p; + return 2; /* option only */ + } + else if (c == ':') { + OnigOptionType prev = env->option; + + env->option = option; + r = fetch_token(tok, &p, end, env); + if (r < 0) return r; + r = parse_subexp(&target, tok, term, &p, end, env); + env->option = prev; + if (r < 0) return r; + *np = node_new_option(option); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + NEFFECT(*np).target = target; + *src = p; + return 0; + } + + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + PFETCH(c); + } + } + break; + + default: + return ONIGERR_UNDEFINED_GROUP_OPTION; + } + } + else { +#ifdef USE_NAMED_GROUP + if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP)) + goto group; +#endif + *np = node_new_effect_memory(env->option, 0); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + num = scan_env_add_mem_entry(env); + if (num < 0) return num; + NEFFECT(*np).regnum = num; + } + + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + r = fetch_token(tok, &p, end, env); + if (r < 0) return r; + r = parse_subexp(&target, tok, term, &p, end, env); + if (r < 0) return r; + + if (NTYPE(*np) == N_ANCHOR) + NANCHOR(*np).target = target; + else { + NEFFECT(*np).target = target; + if (NEFFECT(*np).type == EFFECT_MEMORY) { + /* Don't move this to previous of parse_subexp() */ + r = scan_env_set_mem_node(env, NEFFECT(*np).regnum, *np); + if (r != 0) return r; + } + } + + *src = p; + return 0; +} + +static char* PopularQStr[] = { + "?", "*", "+", "??", "*?", "+?" +}; + +static char* ReduceQStr[] = { + "", "", "*", "*?", "??", "+ and ??", "+? and ?" +}; + +static int +set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env) +{ + QualifierNode* qn; + + qn = &(NQUALIFIER(qnode)); + if (qn->lower == 1 && qn->upper == 1) { + return 1; + } + + switch (NTYPE(target)) { + case N_STRING: + if (! group) { + StrNode* sn = &(NSTRING(target)); + if (str_node_can_be_split(sn, env->enc)) { + Node* n = str_node_split_last_char(sn, env->enc); + if (IS_NOT_NULL(n)) { + qn->target = n; + return 2; + } + } + } + break; + + case N_QUALIFIER: + { /* check redundant double repeat. */ + /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */ + QualifierNode* qnt = &(NQUALIFIER(target)); + +#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR + if (qn->by_number == 0 && qnt->by_number == 0 && + IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) { + int nestq_num, targetq_num; + char buf[WARN_BUFSIZE]; + + nestq_num = popular_qualifier_num(qn); + targetq_num = popular_qualifier_num(qnt); + + switch(ReduceTypeTable[targetq_num][nestq_num]) { + case RQ_ASIS: + break; + + case RQ_DEL: + if (onig_verb_warn != onig_null_warn) { + onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, + env->pattern, env->pattern_end, + "redundant nested repeat operator"); + (*onig_verb_warn)(buf); + } + goto warn_exit; + break; + + default: + if (onig_verb_warn != onig_null_warn) { + onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, + env->pattern, env->pattern_end, + "nested repeat operator %s and %s was replaced with '%s'", + PopularQStr[targetq_num], PopularQStr[nestq_num], + ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]); + (*onig_verb_warn)(buf); + } + goto warn_exit; + break; + } + } + + warn_exit: +#endif + if (popular_qualifier_num(qnt) >= 0 && popular_qualifier_num(qn) >= 0) { + onig_reduce_nested_qualifier(qnode, target); + goto q_exit; + } + } + break; + + default: + break; + } + + qn->target = target; + q_exit: + return 0; +} + +static int +make_compound_alt_node_from_cc(OnigAmbigType ambig_flag, OnigEncoding enc, + CClassNode* cc, Node** root) +{ + int r, i, j, k, clen, len, ncode, n; + UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; + Node **ptail, *snode = NULL_NODE; + OnigCompAmbigCodes* ccs; + OnigCompAmbigCodeItem* ci; + OnigAmbigType amb; + + n = 0; + *root = NULL_NODE; + ptail = root; + + + for (amb = 0x01; amb <= ONIGENC_AMBIGUOUS_MATCH_LIMIT; amb <<= 1) { + if ((amb & ambig_flag) == 0) continue; + + ncode = ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc, amb, &ccs); + for (i = 0; i < ncode; i++) { + if (onig_is_code_in_cc(enc, ccs[i].code, cc)) { + for (j = 0; j < ccs[i].n; j++) { + ci = &(ccs[i].items[j]); + if (ci->len > 1) { /* compound only */ + if (IS_CCLASS_NOT(cc)) clear_not_flag_cclass(cc, enc); + + clen = ci->len; + for (k = 0; k < clen; k++) { + len = ONIGENC_CODE_TO_MBC(enc, ci->code[k], buf); + + if (k == 0) { + snode = node_new_str_raw(buf, buf + len); + CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY); + } + else { + r = onig_node_str_cat(snode, buf, buf + len); + if (r < 0) return r; + } + } + + *ptail = node_new_alt(snode, NULL_NODE); + CHECK_NULL_RETURN_VAL(*ptail, ONIGERR_MEMORY); + ptail = &(NCONS(*ptail).right); + n++; + } + } + } + } + } + + return n; +} + + +#ifdef USE_SHARED_CCLASS_TABLE + +#define THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS 8 + +/* for ctype node hash table */ + +typedef struct { + OnigEncoding enc; + int not; + int type; +} type_cclass_key; + +static int type_cclass_cmp(type_cclass_key* x, type_cclass_key* y) +{ + if (x->type != y->type) return 1; + if (x->enc != y->enc) return 1; + if (x->not != y->not) return 1; + return 0; +} + +static int type_cclass_hash(type_cclass_key* key) +{ + int i, val; + unsigned char *p; + + val = 0; + + p = (unsigned char* )&(key->enc); + for (i = 0; i < sizeof(key->enc); i++) { + val = val * 997 + (int )*p++; + } + + p = (unsigned char* )(&key->type); + for (i = 0; i < sizeof(key->type); i++) { + val = val * 997 + (int )*p++; + } + + val += key->not; + return val + (val >> 5); +} + +static struct st_hash_type type_type_cclass_hash = { + type_cclass_cmp, + type_cclass_hash, +}; + +static st_table* OnigTypeCClassTable; + + +static int +i_free_shared_class(type_cclass_key* key, Node* node, void* arg) +{ + if (IS_NOT_NULL(node)) { + CClassNode* cc = &(NCCLASS(node)); + if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf); + xfree(node); + } + return ST_DELETE; +} + +extern int +onig_free_shared_cclass_table() +{ + if (IS_NOT_NULL(OnigTypeCClassTable)) { + onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0); + } + + return 0; +} + +#endif /* USE_SHARED_CCLASS_TABLE */ + + +static int +parse_exp(Node** np, OnigToken* tok, int term, + UChar** src, UChar* end, ScanEnv* env) +{ + int r, len, group = 0; + Node* qn; + Node** targetp; + + *np = NULL; + if (tok->type == term) + goto end_of_token; + + switch (tok->type) { + case TK_ALT: + case TK_EOT: + end_of_token: + *np = node_new_empty(); + return tok->type; + break; + + case TK_SUBEXP_OPEN: + r = parse_effect(np, tok, TK_SUBEXP_CLOSE, src, end, env); + if (r < 0) return r; + if (r == 1) group = 1; + else if (r == 2) { /* option only */ + Node* target; + OnigOptionType prev = env->option; + + env->option = NEFFECT(*np).option; + r = fetch_token(tok, src, end, env); + if (r < 0) return r; + r = parse_subexp(&target, tok, term, src, end, env); + env->option = prev; + if (r < 0) return r; + NEFFECT(*np).target = target; + return tok->type; + } + break; + + case TK_SUBEXP_CLOSE: + if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP)) + return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS; + + if (tok->escaped) goto tk_raw_byte; + else goto tk_byte; + break; + + case TK_STRING: + tk_byte: + { + *np = node_new_str(tok->backp, *src); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + + while (1) { + r = fetch_token(tok, src, end, env); + if (r < 0) return r; + if (r != TK_STRING) break; + + r = onig_node_str_cat(*np, tok->backp, *src); + if (r < 0) return r; + } + + string_end: + targetp = np; + goto repeat; + } + break; + + case TK_RAW_BYTE: + tk_raw_byte: + { + *np = node_new_str_raw_char((UChar )tok->u.c); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + len = 1; + while (1) { + r = fetch_token(tok, src, end, env); + if (r < 0) return r; + if (r != TK_RAW_BYTE) { +#ifndef NUMBERED_CHAR_IS_NOT_CASE_AMBIG + if (len >= enc_len(env->enc, NSTRING(*np).s)) { + NSTRING_CLEAR_RAW(*np); + } +#endif + goto string_end; + } + + r = node_str_cat_char(*np, (UChar )tok->u.c); + if (r < 0) return r; + len++; + } + } + break; + + case TK_CODE_POINT: + { + UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; + int num = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf); + if (num < 0) return num; +#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG + *np = node_new_str_raw(buf, buf + num); +#else + *np = node_new_str(buf, buf + num); +#endif + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + } + break; + + case TK_QUOTE_OPEN: + { + OnigCodePoint end_op[2]; + UChar *qstart, *qend, *nextp; + + end_op[0] = (OnigCodePoint )MC_ESC(env->enc); + end_op[1] = (OnigCodePoint )'E'; + qstart = *src; + qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc); + if (IS_NULL(qend)) { + nextp = qend = end; + } + *np = node_new_str(qstart, qend); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + *src = nextp; + } + break; + + case TK_CHAR_TYPE: + { + switch (tok->u.subtype) { + case CTYPE_WORD: + case CTYPE_NOT_WORD: + *np = node_new_ctype(tok->u.subtype); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + break; + + case CTYPE_WHITE_SPACE: + case CTYPE_NOT_WHITE_SPACE: + case CTYPE_DIGIT: + case CTYPE_NOT_DIGIT: + case CTYPE_XDIGIT: + case CTYPE_NOT_XDIGIT: + { + CClassNode* cc; + int ctype, not; + +#ifdef USE_SHARED_CCLASS_TABLE + OnigCodePoint *sbr, *mbr; + + ctype = parse_ctype_to_enc_ctype(tok->u.subtype, ¬); + r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, ctype, &sbr, &mbr); + if (r == 0 && + ONIGENC_CODE_RANGE_NUM(mbr) + >= THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS) { + type_cclass_key key; + type_cclass_key* new_key; + + key.enc = env->enc; + key.not = not; + key.type = ctype; + + THREAD_ATOMIC_START; + + if (IS_NULL(OnigTypeCClassTable)) { + OnigTypeCClassTable + = onig_st_init_table_with_size(&type_type_cclass_hash, 10); + if (IS_NULL(OnigTypeCClassTable)) { + THREAD_ATOMIC_END; + return ONIGERR_MEMORY; + } + } + else { + if (onig_st_lookup(OnigTypeCClassTable, (st_data_t )&key, + (st_data_t* )np)) { + THREAD_ATOMIC_END; + break; + } + } + + *np = node_new_cclass_by_codepoint_range(not, sbr, mbr); + if (IS_NULL(*np)) { + THREAD_ATOMIC_END; + return ONIGERR_MEMORY; + } + + CCLASS_SET_SHARE(&(NCCLASS(*np))); + new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key)); + onig_st_add_direct(OnigTypeCClassTable, (st_data_t )new_key, + (st_data_t )*np); + + THREAD_ATOMIC_END; + } + else { +#endif + ctype = parse_ctype_to_enc_ctype(tok->u.subtype, ¬); + *np = node_new_cclass(); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + cc = &(NCCLASS(*np)); + add_ctype_to_cc(cc, ctype, 0, env); + if (not != 0) CCLASS_SET_NOT(cc); +#ifdef USE_SHARED_CCLASS_TABLE + } +#endif + } + break; + + default: + return ONIGERR_PARSER_BUG; + break; + } + } + break; + + case TK_CHAR_PROPERTY: + r = parse_char_property(np, tok, src, end, env); + if (r != 0) return r; + break; + + case TK_CC_OPEN: + { + CClassNode* cc; + + r = parse_char_class(np, tok, src, end, env); + if (r != 0) return r; + + cc = &(NCCLASS(*np)); + + if (IS_IGNORECASE(env->option)) { + int i, n, in_cc; + OnigPairAmbigCodes* ccs; + BitSetRef bs = cc->bs; + OnigAmbigType amb; + + for (amb = 0x01; amb <= ONIGENC_AMBIGUOUS_MATCH_LIMIT; amb <<= 1) { + if ((amb & env->ambig_flag) == 0) continue; + + n = ONIGENC_GET_ALL_PAIR_AMBIG_CODES(env->enc, amb, &ccs); + for (i = 0; i < n; i++) { + in_cc = onig_is_code_in_cc(env->enc, ccs[i].from, cc); + + if ((in_cc != 0 && !IS_CCLASS_NOT(cc)) || + (in_cc == 0 && IS_CCLASS_NOT(cc))) { + if (ONIGENC_MBC_MINLEN(env->enc) > 1 || + ccs[i].from >= SINGLE_BYTE_SIZE) { + /* if (cc->not) clear_not_flag_cclass(cc, env->enc); */ + add_code_range(&(cc->mbuf), env, ccs[i].to, ccs[i].to); + } + else { + if (BITSET_AT(bs, ccs[i].from)) { + /* /(?i:[^A-C])/.match("a") ==> fail. */ + BITSET_SET_BIT(bs, ccs[i].to); + } + if (BITSET_AT(bs, ccs[i].to)) { + BITSET_SET_BIT(bs, ccs[i].from); + } + } + } + } + } + } + + if (IS_IGNORECASE(env->option) && + (env->ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { + int res; + Node *alt_root, *work; + + res = make_compound_alt_node_from_cc(env->ambig_flag, env->enc, + cc, &alt_root); + if (res < 0) return res; + if (res > 0) { + work = node_new_alt(*np, alt_root); + if (IS_NULL(work)) { + onig_node_free(alt_root); + return ONIGERR_MEMORY; + } + *np = work; + } + } + } + break; + + case TK_ANYCHAR: + *np = node_new_anychar(); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + break; + + case TK_ANYCHAR_ANYTIME: + *np = node_new_anychar(); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + qn = node_new_qualifier(0, REPEAT_INFINITE, 0); + CHECK_NULL_RETURN_VAL(qn, ONIGERR_MEMORY); + NQUALIFIER(qn).target = *np; + *np = qn; + break; + + case TK_BACKREF: + len = tok->u.backref.num; + *np = node_new_backref(len, + (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)), + tok->u.backref.by_name, env); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + break; + +#ifdef USE_SUBEXP_CALL + case TK_CALL: + *np = node_new_call(tok->u.call.name, tok->u.call.name_end); + CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); + env->num_call++; + break; +#endif + + case TK_ANCHOR: + *np = onig_node_new_anchor(tok->u.anchor); + break; + + case TK_OP_REPEAT: + case TK_INTERVAL: + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) { + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS)) + return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED; + else + *np = node_new_empty(); + } + else { + goto tk_byte; + } + break; + + default: + return ONIGERR_PARSER_BUG; + break; + } + + { + targetp = np; + + re_entry: + r = fetch_token(tok, src, end, env); + if (r < 0) return r; + + repeat: + if (r == TK_OP_REPEAT || r == TK_INTERVAL) { + if (is_invalid_qualifier_target(*targetp)) + return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID; + + qn = node_new_qualifier(tok->u.repeat.lower, tok->u.repeat.upper, + (r == TK_INTERVAL ? 1 : 0)); + CHECK_NULL_RETURN_VAL(qn, ONIGERR_MEMORY); + NQUALIFIER(qn).greedy = tok->u.repeat.greedy; + r = set_qualifier(qn, *targetp, group, env); + if (r < 0) return r; + + if (tok->u.repeat.possessive != 0) { + Node* en; + en = node_new_effect(EFFECT_STOP_BACKTRACK); + CHECK_NULL_RETURN_VAL(en, ONIGERR_MEMORY); + NEFFECT(en).target = qn; + qn = en; + } + + if (r == 0) { + *targetp = qn; + } + else if (r == 2) { /* split case: /abc+/ */ + Node *tmp; + + *targetp = node_new_list(*targetp, NULL); + CHECK_NULL_RETURN_VAL(*targetp, ONIGERR_MEMORY); + tmp = NCONS(*targetp).right = node_new_list(qn, NULL); + CHECK_NULL_RETURN_VAL(tmp, ONIGERR_MEMORY); + targetp = &(NCONS(tmp).left); + } + goto re_entry; + } + } + + return r; +} + +static int +parse_branch(Node** top, OnigToken* tok, int term, + UChar** src, UChar* end, ScanEnv* env) +{ + int r; + Node *node, **headp; + + *top = NULL; + r = parse_exp(&node, tok, term, src, end, env); + if (r < 0) return r; + + if (r == TK_EOT || r == term || r == TK_ALT) { + *top = node; + } + else { + *top = node_new_list(node, NULL); + headp = &(NCONS(*top).right); + while (r != TK_EOT && r != term && r != TK_ALT) { + r = parse_exp(&node, tok, term, src, end, env); + if (r < 0) return r; + + if (NTYPE(node) == N_LIST) { + *headp = node; + while (IS_NOT_NULL(NCONS(node).right)) node = NCONS(node).right; + headp = &(NCONS(node).right); + } + else { + *headp = node_new_list(node, NULL); + headp = &(NCONS(*headp).right); + } + } + } + + return r; +} + +/* term_tok: TK_EOT or TK_SUBEXP_CLOSE */ +static int +parse_subexp(Node** top, OnigToken* tok, int term, + UChar** src, UChar* end, ScanEnv* env) +{ + int r; + Node *node, **headp; + + *top = NULL; + r = parse_branch(&node, tok, term, src, end, env); + if (r < 0) { + onig_node_free(node); + return r; + } + + if (r == term) { + *top = node; + } + else if (r == TK_ALT) { + *top = node_new_alt(node, NULL); + headp = &(NCONS(*top).right); + while (r == TK_ALT) { + r = fetch_token(tok, src, end, env); + if (r < 0) return r; + r = parse_branch(&node, tok, term, src, end, env); + if (r < 0) return r; + + *headp = node_new_alt(node, NULL); + headp = &(NCONS(*headp).right); + } + + if (tok->type != term) + goto err; + } + else { + err: + if (term == TK_SUBEXP_CLOSE) + return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS; + else + return ONIGERR_PARSER_BUG; + } + + return r; +} + +static int +parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env) +{ + int r; + OnigToken tok; + + r = fetch_token(&tok, src, end, env); + if (r < 0) return r; + r = parse_subexp(top, &tok, TK_EOT, src, end, env); + if (r < 0) return r; + return 0; +} + +extern int +onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end, regex_t* reg, + ScanEnv* env) +{ + int r; + UChar* p; + +#ifdef USE_NAMED_GROUP + names_clear(reg); +#endif + + scan_env_clear(env); + env->option = reg->options; + env->ambig_flag = reg->ambig_flag; + env->enc = reg->enc; + env->syntax = reg->syntax; + env->pattern = (UChar* )pattern; + env->pattern_end = (UChar* )end; + env->reg = reg; + + *root = NULL; + p = (UChar* )pattern; + r = parse_regexp(root, &p, (UChar* )end, env); + reg->num_mem = env->num_mem; + return r; +} + +extern void +onig_scan_env_set_error_string(ScanEnv* env, int ecode, + UChar* arg, UChar* arg_end) +{ + env->error = arg; + env->error_end = arg_end; +} +/********************************************************************** + + ruby.c - + + $Author: nobu $ + $Date: 2005/05/01 00:13:34 $ + created at: Tue Aug 10 12:47:31 JST 1993 + + Copyright (C) 1993-2003 Yukihiro Matsumoto + Copyright (C) 2000 Network Applied Communication Laboratory, Inc. + Copyright (C) 2000 Information-technology Promotion Agency, Japan + +**********************************************************************/ + +#if defined _WIN32 || defined __CYGWIN__ +#include <windows.h> +#endif +#ifdef _WIN32_WCE +#include <winsock.h> +#include "wince.h" +#endif +#include "ruby.h" +#include "dln.h" +#include "node.h" +#include <stdio.h> +#include <sys/types.h> +#include <ctype.h> + +#ifdef __hpux +#include <sys/pstat.h> +#endif + +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_SYS_PARAM_H +# include <sys/param.h> +#endif +#ifndef MAXPATHLEN +# define MAXPATHLEN 1024 +#endif + +#ifndef HAVE_STRING_H +char *strchr _((const char*,const char)); +char *strrchr _((const char*,const char)); +char *strstr _((const char*,const char*)); +#endif + +#include "util.h" + +#ifndef HAVE_STDLIB_H +char *getenv(); +#endif + +VALUE ruby_debug = Qfalse; +VALUE ruby_verbose = Qfalse; +static int sflag = 0; +static int xflag = 0; +extern int ruby_yydebug; + +char *ruby_inplace_mode = Qfalse; + +static void load_stdin _((void)); +static void load_file _((const char *, int)); +static void forbid_setid _((const char *)); + +static VALUE do_loop = Qfalse, do_print = Qfalse; +static VALUE do_check = Qfalse, do_line = Qfalse; +static VALUE do_split = Qfalse; + +static char *script; + +static int origargc; +static char **origargv; + +static void +usage(name) + const char *name; +{ + /* This message really ought to be max 23 lines. + * Removed -h because the user already knows that option. Others? */ + + static char *usage_msg[] = { +"-0[octal] specify record separator (\\0, if no argument)", +"-a autosplit mode with -n or -p (splits $_ into $F)", +"-c check syntax only", +"-Cdirectory cd to directory, before executing your script", +"-d set debugging flags (set $DEBUG to true)", +"-e 'command' one line of script. Several -e's allowed. Omit [programfile]", +"-Fpattern split() pattern for autosplit (-a)", +"-i[extension] edit ARGV files in place (make backup if extension supplied)", +"-Idirectory specify $LOAD_PATH directory (may be used more than once)", +"-Kkcode specifies KANJI (Japanese) code-set", +"-l enable line ending processing", +"-n assume 'while gets(); ... end' loop around your script", +"-p assume loop like -n but print line also like sed", +"-rlibrary require the library, before executing your script", +"-s enable some switch parsing for switches after script name", +"-S look for the script using PATH environment variable", +"-T[level] turn on tainting checks", +"-v print version number, then turn on verbose mode", +"-w turn warnings on for your script", +"-W[level] set warning level; 0=silence, 1=medium, 2=verbose (default)", +"-x[directory] strip off text before #!ruby line and perhaps cd to directory", +"--copyright print the copyright", +"--version print the version", +NULL +}; + char **p = usage_msg; + + printf("Usage: %s [switches] [--] [programfile] [arguments]\n", name); + while (*p) + printf(" %s\n", *p++); +} + +extern VALUE rb_load_path; + +#define STATIC_FILE_LENGTH 255 + +#if defined _WIN32 || defined __CYGWIN__ || defined __DJGPP__ +static char * +rubylib_mangle(s, l) + char *s; + unsigned int l; +{ + static char *newp, *oldp; + static int newl, oldl, notfound; + static char newsub[STATIC_FILE_LENGTH+1]; + + if (!newp && !notfound) { + newp = getenv("RUBYLIB_PREFIX"); + if (newp) { + char *s; + + oldp = newp; + while (*newp && !ISSPACE(*newp) && *newp != ';') { + newp++; oldl++; /* Skip digits. */ + } + while (*newp && (ISSPACE(*newp) || *newp == ';')) { + newp++; /* Skip whitespace. */ + } + newl = strlen(newp); + if (newl == 0 || oldl == 0 || newl > STATIC_FILE_LENGTH) { + rb_fatal("malformed RUBYLIB_PREFIX"); + } + strcpy(newsub, newp); + s = newsub; + while (*s) { + if (*s == '\\') *s = '/'; + s++; + } + } + else { + notfound = 1; + } + } + if (l == 0) { + l = strlen(s); + } + if (!newp || l < oldl || strncasecmp(oldp, s, oldl) != 0) { + static char ret[STATIC_FILE_LENGTH+1]; + strncpy(ret, s, l); + ret[l] = 0; + return ret; + } + if (l + newl - oldl > STATIC_FILE_LENGTH || newl > STATIC_FILE_LENGTH) { + rb_fatal("malformed RUBYLIB_PREFIX"); + } + strcpy(newsub + newl, s + oldl); + newsub[l + newl - oldl] = 0; + return newsub; +} +#define rubylib_mangled_path(s, l) rb_str_new2(rubylib_mangle((s), (l))) +#define rubylib_mangled_path2(s) rb_str_new2(rubylib_mangle((s), 0)) +#else +#define rubylib_mangled_path(s, l) rb_str_new((s), (l)) +#define rubylib_mangled_path2(s) rb_str_new2(s) +#endif + +void +ruby_push_include(path, filter) + const char *path; + VALUE (*filter)_((VALUE)); +{ + const char sep = PATH_SEP_CHAR; + + if (path == 0) return; +#if defined(__CYGWIN__) + { + char rubylib[FILENAME_MAX]; + conv_to_posix_path(path, rubylib, FILENAME_MAX); + path = rubylib; + } +#endif + if (strchr(path, sep)) { + const char *p, *s; + VALUE ary = rb_ary_new(); + + p = path; + while (*p) { + while (*p == sep) p++; + if ((s = strchr(p, sep)) != 0) { + rb_ary_push(ary, (*filter)(rubylib_mangled_path(p, (int)(s-p)))); + p = s + 1; + } + else { + rb_ary_push(ary, (*filter)(rubylib_mangled_path2(p))); + break; + } + } + rb_ary_concat(rb_load_path, ary); + } + else { + rb_ary_push(rb_load_path, (*filter)(rubylib_mangled_path2(path))); + } +} + +static VALUE +identical_path(path) + VALUE path; +{ + return path; +} + +void +ruby_incpush(const char *path) +{ + ruby_push_include(path, identical_path); +} + +static VALUE +expand_include_path(path) + VALUE path; +{ + char *p = RSTRING(path)->ptr; + if (!p) return path; + if (*p == '.' && p[1] == '/') return path; + return rb_file_expand_path(path, Qnil); +} + + +void +ruby_incpush_expand(const char *path) +{ + ruby_push_include(path, expand_include_path); +} + +#if defined DOSISH || defined __CYGWIN__ +#define LOAD_RELATIVE 1 +#endif + +#if defined DOSISH || defined __CYGWIN__ +static inline void translate_char _((char *, int, int)); + +static inline void +translate_char(p, from, to) + char *p; + int from, to; +{ + while (*p) { + if ((unsigned char)*p == from) + *p = to; +#ifdef CharNext /* defined as CharNext[AW] on Windows. */ + p = CharNext(p); +#else + p += mblen(p, RUBY_MBCHAR_MAXSIZE); +#endif + } +} +#endif + +void +ruby_init_loadpath() +{ +#if defined LOAD_RELATIVE + char libpath[MAXPATHLEN+1]; + char *p; + int rest; +#if defined _WIN32 || defined __CYGWIN__ + HMODULE libruby = NULL; + MEMORY_BASIC_INFORMATION m; + +#ifndef _WIN32_WCE + memset(&m, 0, sizeof(m)); + if (VirtualQuery(ruby_init_loadpath, &m, sizeof(m)) && m.State == MEM_COMMIT) + libruby = (HMODULE)m.AllocationBase; +#endif + GetModuleFileName(libruby, libpath, sizeof libpath); +#elif defined(DJGPP) + extern char *__dos_argv0; + strncpy(libpath, __dos_argv0, sizeof(libpath) - 1); +#elif defined(__human68k__) + extern char **_argv; + strncpy(libpath, _argv[0], sizeof(libpath) - 1); +#elif defined(__EMX__) + _execname(libpath, sizeof(libpath) - 1); +#endif + + libpath[sizeof(libpath) - 1] = '\0'; +#if defined DOSISH || defined __CYGWIN__ + translate_char(libpath, '\\', '/'); +#endif + p = strrchr(libpath, '/'); + if (p) { + *p = 0; + if (p-libpath > 3 && !strcasecmp(p-4, "/bin")) { + p -= 4; + *p = 0; + } + } + else { + strcpy(libpath, "."); + p = libpath + 1; + } + + rest = sizeof(libpath) - 1 - (p - libpath); + +#define RUBY_RELATIVE(path) (strncpy(p, (path), rest), libpath) +#else +#define RUBY_RELATIVE(path) (path) +#endif + + if (rb_safe_level() == 0) { + ruby_incpush(getenv("RUBYLIB")); + } + +#ifdef RUBY_SEARCH_PATH + ruby_incpush(RUBY_RELATIVE(RUBY_SEARCH_PATH)); +#endif + + ruby_incpush(RUBY_RELATIVE(RUBY_SITE_LIB2)); +#ifdef RUBY_SITE_THIN_ARCHLIB + ruby_incpush(RUBY_RELATIVE(RUBY_SITE_THIN_ARCHLIB)); +#endif + ruby_incpush(RUBY_RELATIVE(RUBY_SITE_ARCHLIB)); + ruby_incpush(RUBY_RELATIVE(RUBY_SITE_LIB)); + + ruby_incpush(RUBY_RELATIVE(RUBY_LIB)); +#ifdef RUBY_THIN_ARCHLIB + ruby_incpush(RUBY_RELATIVE(RUBY_THIN_ARCHLIB)); +#endif + ruby_incpush(RUBY_RELATIVE(RUBY_ARCHLIB)); + + if (rb_safe_level() == 0) { + ruby_incpush("."); + } +} + +struct req_list { + char *name; + struct req_list *next; +}; +static struct req_list req_list_head, *req_list_last = &req_list_head; + +static void +add_modules(mod) + const char *mod; +{ + struct req_list *list; + + list = ALLOC(struct req_list); + list->name = ALLOC_N(char, strlen(mod)+1); + strcpy(list->name, mod); + list->next = 0; + req_list_last->next = list; + req_list_last = list; +} + +extern void Init_ext _((void)); + +static void +require_libraries() +{ + extern NODE *ruby_eval_tree; + NODE *save[3]; + struct req_list *list = req_list_head.next; + struct req_list *tmp; + + save[0] = ruby_eval_tree; + save[1] = NEW_BEGIN(0); + ruby_eval_tree = 0; + ruby_current_node = 0; + Init_ext(); /* should be called here for some reason :-( */ + ruby_current_node = save[1]; + ruby_set_current_source(); + req_list_last = 0; + while (list) { + ruby_current_node = 0; + rb_require(list->name); + tmp = list->next; + free(list->name); + free(list); + list = tmp; + ruby_current_node = save[1]; + ruby_set_current_source(); + } + req_list_head.next = 0; + ruby_eval_tree = save[0]; + rb_gc_force_recycle((VALUE)save[1]); + ruby_current_node = 0; +} + +static void +process_sflag() +{ + if (sflag) { + long n; + VALUE *args; + + n = RARRAY(rb_argv)->len; + args = RARRAY(rb_argv)->ptr; + while (n > 0) { + VALUE v = *args++; + char *s = StringValuePtr(v); + char *p; + int hyphen = Qfalse; + + if (s[0] != '-') break; + n--; + if (s[1] == '-' && s[2] == '\0') break; + + v = Qtrue; + /* check if valid name before replacing - with _ */ + for (p = s + 1; *p; p++) { + if (*p == '=') { + *p++ = '\0'; + v = rb_str_new2(p); + break; + } + if (*p == '-') { + hyphen = Qtrue; + } + else if (*p != '_' && !ISALNUM(*p)) { + VALUE name_error[2]; + name_error[0] = rb_str_new2("invalid name for global variable - "); + if (!(p = strchr(p, '='))) { + rb_str_cat2(name_error[0], s); + } + else { + rb_str_cat(name_error[0], s, p - s); + } + name_error[1] = args[-1]; + rb_exc_raise(rb_class_new_instance(2, name_error, rb_eNameError)); + } + } + s[0] = '$'; + if (hyphen) { + for (p = s + 1; *p; ++p) { + if (*p == '-') *p = '_'; + } + } + rb_gv_set(s, v); + } + n = RARRAY(rb_argv)->len - n; + while (n--) { + rb_ary_shift(rb_argv); + } + } + sflag = 0; +} + +static void proc_options _((int argc, char **argv)); + +static char* +moreswitches(s) + char *s; +{ + int argc; char *argv[3]; + char *p = s; + + argc = 2; argv[0] = argv[2] = 0; + while (*s && !ISSPACE(*s)) + s++; + argv[1] = ALLOCA_N(char, s-p+2); + argv[1][0] = '-'; + strncpy(argv[1]+1, p, s-p); + argv[1][s-p+1] = '\0'; + proc_options(argc, argv); + while (*s && ISSPACE(*s)) + s++; + return s; +} + +NODE *ruby_eval_tree; + +static void +proc_options(argc, argv) + int argc; + char **argv; +{ + char *argv0 = argv[0]; + int do_search; + char *s; + NODE *volatile script_node = 0; + + int version = 0; + int copyright = 0; + int verbose = 0; + VALUE e_script = Qfalse; + + if (argc == 0) return; + + do_search = Qfalse; + + for (argc--,argv++; argc > 0; argc--,argv++) { + if (argv[0][0] != '-' || !argv[0][1]) break; + + s = argv[0]+1; + reswitch: + switch (*s) { + case 'a': + do_split = Qtrue; + s++; + goto reswitch; + + case 'p': + do_print = Qtrue; + /* through */ + case 'n': + do_loop = Qtrue; + s++; + goto reswitch; + + case 'd': + ruby_debug = Qtrue; + ruby_verbose = Qtrue; + s++; + goto reswitch; + + case 'y': + ruby_yydebug = 1; + s++; + goto reswitch; + + case 'v': + if (argv0 == 0 || verbose) { + s++; + goto reswitch; + } + ruby_show_version(); + verbose = 1; + case 'w': + ruby_verbose = Qtrue; + s++; + goto reswitch; + + case 'W': + { + int numlen; + int v = 2; /* -W as -W2 */ + + if (*++s) { + v = scan_oct(s, 1, &numlen); + if (numlen == 0) v = 1; + s += numlen; + } + switch (v) { + case 0: + ruby_verbose = Qnil; break; + case 1: + ruby_verbose = Qfalse; break; + default: + ruby_verbose = Qtrue; break; + } + } + goto reswitch; + + case 'c': + do_check = Qtrue; + s++; + goto reswitch; + + case 's': + forbid_setid("-s"); + sflag = 1; + s++; + goto reswitch; + + case 'h': + usage(origargv[0]); + exit(0); + + case 'l': + do_line = Qtrue; + rb_output_rs = rb_rs; + s++; + goto reswitch; + + case 'S': + forbid_setid("-S"); + do_search = Qtrue; + s++; + goto reswitch; + + case 'e': + forbid_setid("-e"); + if (!*++s) { + s = argv[1]; + argc--,argv++; + } + if (!s) { + fprintf(stderr, "%s: no code specified for -e\n", origargv[0]); + exit(2); + } + if (!e_script) { + e_script = rb_str_new(0,0); + if (script == 0) script = "-e"; + } + rb_str_cat2(e_script, s); + rb_str_cat2(e_script, "\n"); + break; + + case 'r': + forbid_setid("-r"); + if (*++s) { + add_modules(s); + } + else if (argv[1]) { + add_modules(argv[1]); + argc--,argv++; + } + break; + + case 'i': + forbid_setid("-i"); + if (ruby_inplace_mode) free(ruby_inplace_mode); + ruby_inplace_mode = strdup(s+1); + break; + + case 'x': + xflag = Qtrue; + s++; + if (*s && chdir(s) < 0) { + rb_fatal("Can't chdir to %s", s); + } + break; + + case 'C': + case 'X': + s++; + if (!*s) { + s = argv[1]; + argc--,argv++; + } + if (!s || !*s) { + rb_fatal("Can't chdir"); + } + if (chdir(s) < 0) { + rb_fatal("Can't chdir to %s", s); + } + break; + + case 'F': + if (*++s) { + rb_fs = rb_reg_new(s, strlen(s), 0); + } + break; + + case 'K': + if (*++s) { + rb_set_kcode(s); + s++; + } + goto reswitch; + + case 'T': + { + int numlen; + int v = 1; + + if (*++s) { + v = scan_oct(s, 2, &numlen); + if (numlen == 0) v = 1; + s += numlen; + } + rb_set_safe_level(v); + } + goto reswitch; + + case 'I': + forbid_setid("-I"); + if (*++s) + ruby_incpush_expand(s); + else if (argv[1]) { + ruby_incpush_expand(argv[1]); + argc--,argv++; + } + break; + + case '0': + { + int numlen; + int v; + char c; + + v = scan_oct(s, 4, &numlen); + s += numlen; + if (v > 0377) rb_rs = Qnil; + else if (v == 0 && numlen >= 2) { + rb_rs = rb_str_new2("\n\n"); + } + else { + c = v & 0xff; + rb_rs = rb_str_new(&c, 1); + } + } + goto reswitch; + + case '-': + if (!s[1] || (s[1] == '\r' && !s[2])) { + argc--,argv++; + goto switch_end; + } + s++; + if (strcmp("copyright", s) == 0) + copyright = 1; + else if (strcmp("debug", s) == 0) { + ruby_debug = Qtrue; + ruby_verbose = Qtrue; + } + else if (strcmp("version", s) == 0) + version = 1; + else if (strcmp("verbose", s) == 0) { + verbose = 1; + ruby_verbose = Qtrue; + } + else if (strcmp("yydebug", s) == 0) + ruby_yydebug = 1; + else if (strcmp("help", s) == 0) { + usage(origargv[0]); + exit(0); + } + else { + fprintf(stderr, "%s: invalid option --%s (-h will show valid options)\n", + origargv[0], s); + exit(2); + } + break; + + case '\r': + if (!s[1]) break; + + default: + { + const char *format; + if (ISPRINT(*s)) { + format = "%s: invalid option -%c (-h will show valid options)\n"; + } + else { + format = "%s: invalid option -\\%03o (-h will show valid options)\n"; + } + fprintf(stderr, format, origargv[0], (int)(unsigned char)*s); + } + exit(2); + + case 0: + break; + } + } + + switch_end: + if (argv0 == 0) return; + + if (rb_safe_level() == 0 && (s = getenv("RUBYOPT"))) { + while (ISSPACE(*s)) s++; + if (*s == 'T' || (*s == '-' && *(s+1) == 'T')) { + int numlen; + int v = 1; + + if (*s != 'T') ++s; + if (*++s) { + v = scan_oct(s, 2, &numlen); + if (numlen == 0) v = 1; + } + rb_set_safe_level(v); + } + else { + while (s && *s) { + if (*s == '-') { + s++; + if (ISSPACE(*s)) { + do {s++;} while (ISSPACE(*s)); + continue; + } + } + if (!*s) break; + if (!strchr("IdvwrK", *s)) + rb_raise(rb_eRuntimeError, "illegal switch in RUBYOPT: -%c", *s); + s = moreswitches(s); + } + } + } + + if (version) { + ruby_show_version(); + exit(0); + } + if (copyright) { + ruby_show_copyright(); + } + + if (rb_safe_level() >= 4) { + OBJ_TAINT(rb_argv); + OBJ_TAINT(rb_load_path); + } + + if (!e_script) { + if (argc == 0) { /* no more args */ + if (verbose) exit(0); + script = "-"; + } + else { + script = argv[0]; + if (script[0] == '\0') { + script = "-"; + } + else if (do_search) { + char *path = getenv("RUBYPATH"); + + script = 0; + if (path) { + script = dln_find_file(argv[0], path); + } + if (!script) { + script = dln_find_file(argv[0], getenv(PATH_ENV)); + } + if (!script) script = argv[0]; + script = ruby_sourcefile = rb_source_filename(script); + script_node = NEW_BEGIN(0); + } +#if defined DOSISH || defined __CYGWIN__ + translate_char(script, '\\', '/'); +#endif + argc--; argv++; + } + } + + ruby_script(script); + ruby_set_argv(argc, argv); + process_sflag(); + + ruby_init_loadpath(); + ruby_sourcefile = rb_source_filename(argv0); + if (e_script) { + require_libraries(); + ruby_eval_tree = rb_compile_string(script, e_script, 1); + } + else if (strlen(script) == 1 && script[0] == '-') { + load_stdin(); + } + else { + load_file(script, 1); + } + + process_sflag(); + xflag = 0; + + if (rb_safe_level() >= 4) { + FL_UNSET(rb_argv, FL_TAINT); + FL_UNSET(rb_load_path, FL_TAINT); + } +} + +extern int ruby__end__seen; + +static void +load_file(fname, script) + const char *fname; + int script; +{ + extern VALUE rb_stdin; + VALUE f; + int line_start = 1; + + if (!fname) rb_load_fail(fname); + if (strcmp(fname, "-") == 0) { + f = rb_stdin; + } + else { + FILE *fp = fopen(fname, "r"); + + if (fp == NULL) { + rb_load_fail(fname); + } + fclose(fp); + + f = rb_file_open(fname, "r"); +#if defined DOSISH || defined __CYGWIN__ + { + char *ext = strrchr(fname, '.'); + if (ext && strcasecmp(ext, ".exe") == 0) + rb_io_binmode(f); + } +#endif + } + + if (script) { + VALUE c = 1; /* something not nil */ + VALUE line; + char *p; + + if (xflag) { + forbid_setid("-x"); + xflag = Qfalse; + while (!NIL_P(line = rb_io_gets(f))) { + line_start++; + if (RSTRING(line)->len > 2 + && RSTRING(line)->ptr[0] == '#' + && RSTRING(line)->ptr[1] == '!') { + if ((p = strstr(RSTRING(line)->ptr, "ruby")) != 0) { + goto start_read; + } + } + } + rb_raise(rb_eLoadError, "no Ruby script found in input"); + } + + c = rb_io_getc(f); + if (c == INT2FIX('#')) { + line = rb_io_gets(f); + if (NIL_P(line)) return; + line_start++; + + if (RSTRING(line)->len > 2 && RSTRING(line)->ptr[0] == '!') { + if ((p = strstr(RSTRING(line)->ptr, "ruby")) == 0) { + /* not ruby script, kick the program */ + char **argv; + char *path; + char *pend = RSTRING(line)->ptr + RSTRING(line)->len; + + p = RSTRING(line)->ptr + 1; /* skip `#!' */ + if (pend[-1] == '\n') pend--; /* chomp line */ + if (pend[-1] == '\r') pend--; + *pend = '\0'; + while (p < pend && ISSPACE(*p)) + p++; + path = p; /* interpreter path */ + while (p < pend && !ISSPACE(*p)) + p++; + *p++ = '\0'; + if (p < pend) { + argv = ALLOCA_N(char*, origargc+3); + argv[1] = p; + MEMCPY(argv+2, origargv+1, char*, origargc); + } + else { + argv = origargv; + } + argv[0] = path; + execv(path, argv); + + ruby_sourcefile = rb_source_filename(fname); + ruby_sourceline = 1; + rb_fatal("Can't exec %s", path); + } + + start_read: + p += 4; + RSTRING(line)->ptr[RSTRING(line)->len-1] = '\0'; + if (RSTRING(line)->ptr[RSTRING(line)->len-2] == '\r') + RSTRING(line)->ptr[RSTRING(line)->len-2] = '\0'; + if ((p = strstr(p, " -")) != 0) { + p++; /* skip space before `-' */ + while (*p == '-') { + p = moreswitches(p+1); + } + } + } + } + else if (!NIL_P(c)) { + rb_io_ungetc(f, c); + } + require_libraries(); /* Why here? unnatural */ + if (NIL_P(c)) return; + } + ruby_eval_tree = rb_compile_file(fname, f, line_start); + if (script && ruby__end__seen) { + rb_define_global_const("DATA", f); + } + else if (f != rb_stdin) { + rb_io_close(f); + } +} + +void +rb_load_file(fname) + const char *fname; +{ + load_file(fname, 0); +} + +static void +load_stdin() +{ + forbid_setid("program input from stdin"); + load_file("-", 1); +} + +VALUE rb_progname; +VALUE rb_argv; +VALUE rb_argv0; + +#if !defined(PSTAT_SETCMD) && !defined(HAVE_SETPROCTITLE) && !defined(DOSISH) +static struct { + char *begin, *end; +} envspace; +extern char **environ; + +static void +set_arg0space() +{ + char *s; + int i; + + if (!environ || (s = environ[0]) == NULL) return; + envspace.begin = s; + s += strlen(s); + for (i = 1; environ[i]; i++) { + if (environ[i] == s + 1) { + s++; + s += strlen(s); /* this one is ok too */ + } + } + envspace.end = s; +} +#else +#define set_arg0space() ((void)0) +#endif + +static void +set_arg0(val, id) + VALUE val; + ID id; +{ + char *s; + long i; + static int len; + + if (origargv == 0) rb_raise(rb_eRuntimeError, "$0 not initialized"); + StringValue(val); + s = RSTRING(val)->ptr; + i = RSTRING(val)->len; +#if defined(PSTAT_SETCMD) + if (i >= PST_CLEN) { + union pstun j; + j.pst_command = s; + i = PST_CLEN; + RSTRING(val)->len = i; + *(s + i) = '\0'; + pstat(PSTAT_SETCMD, j, PST_CLEN, 0, 0); + } + else { + union pstun j; + j.pst_command = s; + pstat(PSTAT_SETCMD, j, i, 0, 0); + } + rb_progname = rb_tainted_str_new(s, i); +#elif defined(HAVE_SETPROCTITLE) + setproctitle("%.*s", (int)i, s); + rb_progname = rb_tainted_str_new(s, i); +#else + if (len == 0) { + char *s = origargv[0]; + int i; + + s += strlen(s); + /* See if all the arguments are contiguous in memory */ + for (i = 1; i < origargc; i++) { + if (origargv[i] == s + 1) { + s++; + s += strlen(s); /* this one is ok too */ + } + else { + break; + } + } +#ifndef DOSISH + if (s + 1 == envspace.begin) { + s = envspace.end; + ruby_setenv("", NULL); /* duplicate environ vars */ + } +#endif + len = s - origargv[0]; + } + + if (i >= len) { + i = len; + memcpy(origargv[0], s, i); + origargv[0][i] = '\0'; + } + else { + memcpy(origargv[0], s, i); + s = origargv[0]+i; + *s++ = '\0'; + while (++i < len) + *s++ = ' '; + for (i = 1; i < origargc; i++) + origargv[i] = 0; + } + rb_progname = rb_tainted_str_new2(origargv[0]); +#endif +} + +void +ruby_script(name) + const char *name; +{ + if (name) { + rb_progname = rb_tainted_str_new2(name); + ruby_sourcefile = rb_source_filename(name); + } +} + +static int uid, euid, gid, egid; + +static void +init_ids() +{ + uid = (int)getuid(); + euid = (int)geteuid(); + gid = (int)getgid(); + egid = (int)getegid(); +#ifdef VMS + uid |= gid << 16; + euid |= egid << 16; +#endif + if (uid && (euid != uid || egid != gid)) { + rb_set_safe_level(1); + } +} + +static void +forbid_setid(s) + const char *s; +{ + if (euid != uid) + rb_raise(rb_eSecurityError, "no %s allowed while running setuid", s); + if (egid != gid) + rb_raise(rb_eSecurityError, "no %s allowed while running setgid", s); + if (rb_safe_level() > 0) + rb_raise(rb_eSecurityError, "no %s allowed in tainted mode", s); +} + +static void +verbose_setter(val, id, variable) + VALUE val; + ID id; + VALUE *variable; +{ + ruby_verbose = RTEST(val) ? Qtrue : val; +} + +static VALUE +opt_W_getter(val, id) + VALUE val; + ID id; +{ + if (ruby_verbose == Qnil) return INT2FIX(0); + if (ruby_verbose == Qfalse) return INT2FIX(1); + if (ruby_verbose == Qtrue) return INT2FIX(2); + return Qnil; /* not reached */ +} + +void +ruby_prog_init() +{ + init_ids(); + + ruby_sourcefile = rb_source_filename("ruby"); + rb_define_hooked_variable("$VERBOSE", &ruby_verbose, 0, verbose_setter); + rb_define_hooked_variable("$-v", &ruby_verbose, 0, verbose_setter); + rb_define_hooked_variable("$-w", &ruby_verbose, 0, verbose_setter); + rb_define_virtual_variable("$-W", opt_W_getter, 0); + rb_define_variable("$DEBUG", &ruby_debug); + rb_define_variable("$-d", &ruby_debug); + rb_define_readonly_variable("$-p", &do_print); + rb_define_readonly_variable("$-l", &do_line); + + rb_define_hooked_variable("$0", &rb_progname, 0, set_arg0); + rb_define_hooked_variable("$PROGRAM_NAME", &rb_progname, 0, set_arg0); + + rb_argv = rb_ary_new(); + rb_define_readonly_variable("$*", &rb_argv); + rb_define_global_const("ARGV", rb_argv); + rb_define_readonly_variable("$-a", &do_split); + rb_global_variable(&rb_argv0); + +#ifdef MSDOS + /* + * There is no way we can refer to them from ruby, so close them to save + * space. + */ + (void)fclose(stdaux); + (void)fclose(stdprn); +#endif +} + +void +ruby_set_argv(argc, argv) + int argc; + char **argv; +{ + int i; + +#if defined(USE_DLN_A_OUT) + if (origargv) dln_argv0 = origargv[0]; + else dln_argv0 = argv[0]; +#endif + rb_ary_clear(rb_argv); + for (i=0; i < argc; i++) { + VALUE arg = rb_tainted_str_new2(argv[i]); + + OBJ_FREEZE(arg); + rb_ary_push(rb_argv, arg); + } +} + +NODE *rb_parser_append_print _((NODE*)); +NODE *rb_parser_while_loop _((NODE*, int, int)); + +void +ruby_process_options(argc, argv) + int argc; + char **argv; +{ + origargc = argc; origargv = argv; + + ruby_script(argv[0]); /* for the time being */ + rb_argv0 = rb_progname; +#if defined(USE_DLN_A_OUT) + dln_argv0 = argv[0]; +#endif + set_arg0space(); + proc_options(argc, argv); + + if (do_check && ruby_nerrs == 0) { + printf("Syntax OK\n"); + exit(0); + } + if (do_print) { + ruby_eval_tree = rb_parser_append_print(ruby_eval_tree); + } + if (do_loop) { + ruby_eval_tree = rb_parser_while_loop(ruby_eval_tree, do_line, do_split); + } +} +/********************************************************************** + + signal.c - + + $Author: matz $ + $Date: 2005/03/04 06:47:41 $ + created at: Tue Dec 20 10:13:44 JST 1994 + + Copyright (C) 1993-2003 Yukihiro Matsumoto + Copyright (C) 2000 Network Applied Communication Laboratory, Inc. + Copyright (C) 2000 Information-technology Promotion Agency, Japan + +**********************************************************************/ + +#include "ruby.h" +#include "rubysig.h" +#include <signal.h> +#include <stdio.h> + +#ifdef __BEOS__ +#undef SIGBUS +#endif + +#ifndef NSIG +# ifdef DJGPP +# define NSIG SIGMAX +# else +# define NSIG (_SIGMAX + 1) /* For QNX */ +# endif +#endif + +static struct signals { + char *signm; + int signo; +} siglist [] = { +#ifdef SIGHUP + {"HUP", SIGHUP}, +#endif + {"INT", SIGINT}, +#ifdef SIGQUIT + {"QUIT", SIGQUIT}, +#endif +#ifdef SIGILL + {"ILL", SIGILL}, +#endif +#ifdef SIGTRAP + {"TRAP", SIGTRAP}, +#endif +#ifdef SIGIOT + {"IOT", SIGIOT}, +#endif +#ifdef SIGABRT + {"ABRT", SIGABRT}, +#endif +#ifdef SIGEMT + {"EMT", SIGEMT}, +#endif +#ifdef SIGFPE + {"FPE", SIGFPE}, +#endif +#ifdef SIGKILL + {"KILL", SIGKILL}, +#endif +#ifdef SIGBUS + {"BUS", SIGBUS}, +#endif +#ifdef SIGSEGV + {"SEGV", SIGSEGV}, +#endif +#ifdef SIGSYS + {"SYS", SIGSYS}, +#endif +#ifdef SIGPIPE + {"PIPE", SIGPIPE}, +#endif +#ifdef SIGALRM + {"ALRM", SIGALRM}, +#endif +#ifdef SIGTERM + {"TERM", SIGTERM}, +#endif +#ifdef SIGURG + {"URG", SIGURG}, +#endif +#ifdef SIGSTOP + {"STOP", SIGSTOP}, +#endif +#ifdef SIGTSTP + {"TSTP", SIGTSTP}, +#endif +#ifdef SIGCONT + {"CONT", SIGCONT}, +#endif +#ifdef SIGCHLD + {"CHLD", SIGCHLD}, +#endif +#ifdef SIGCLD + {"CLD", SIGCLD}, +#else +# ifdef SIGCHLD + {"CLD", SIGCHLD}, +# endif +#endif +#ifdef SIGTTIN + {"TTIN", SIGTTIN}, +#endif +#ifdef SIGTTOU + {"TTOU", SIGTTOU}, +#endif +#ifdef SIGIO + {"IO", SIGIO}, +#endif +#ifdef SIGXCPU + {"XCPU", SIGXCPU}, +#endif +#ifdef SIGXFSZ + {"XFSZ", SIGXFSZ}, +#endif +#ifdef SIGVTALRM + {"VTALRM", SIGVTALRM}, +#endif +#ifdef SIGPROF + {"PROF", SIGPROF}, +#endif +#ifdef SIGWINCH + {"WINCH", SIGWINCH}, +#endif +#ifdef SIGUSR1 + {"USR1", SIGUSR1}, +#endif +#ifdef SIGUSR2 + {"USR2", SIGUSR2}, +#endif +#ifdef SIGLOST + {"LOST", SIGLOST}, +#endif +#ifdef SIGMSG + {"MSG", SIGMSG}, +#endif +#ifdef SIGPWR + {"PWR", SIGPWR}, +#endif +#ifdef SIGPOLL + {"POLL", SIGPOLL}, +#endif +#ifdef SIGDANGER + {"DANGER", SIGDANGER}, +#endif +#ifdef SIGMIGRATE + {"MIGRATE", SIGMIGRATE}, +#endif +#ifdef SIGPRE + {"PRE", SIGPRE}, +#endif +#ifdef SIGGRANT + {"GRANT", SIGGRANT}, +#endif +#ifdef SIGRETRACT + {"RETRACT", SIGRETRACT}, +#endif +#ifdef SIGSOUND + {"SOUND", SIGSOUND}, +#endif +#ifdef SIGINFO + {"INFO", SIGINFO}, +#endif + {NULL, 0} +}; + +static int +signm2signo(nm) + char *nm; +{ + struct signals *sigs; + + for (sigs = siglist; sigs->signm; sigs++) + if (strcmp(sigs->signm, nm) == 0) + return sigs->signo; + return 0; +} + +static char* +signo2signm(no) + int no; +{ + struct signals *sigs; + + for (sigs = siglist; sigs->signm; sigs++) + if (sigs->signo == no) + return sigs->signm; + return 0; +} + +const char * +ruby_signal_name(no) + int no; +{ + return signo2signm(no); +} + +/* + * call-seq: + * Process.kill(signal, pid, ...) => fixnum + * + * Sends the given signal to the specified process id(s), or to the + * current process if _pid_ is zero. _signal_ may be an + * integer signal number or a POSIX signal name (either with or without + * a +SIG+ prefix). If _signal_ is negative (or starts + * with a minus sign), kills process groups instead of + * processes. Not all signals are available on all platforms. + * + * pid = fork do + * Signal.trap("HUP") { puts "Ouch!"; exit } + * # ... do some work ... + * end + * # ... + * Process.kill("HUP", pid) + * Process.wait + * + * <em>produces:</em> + * + * Ouch! + */ + +VALUE +rb_f_kill(argc, argv) + int argc; + VALUE *argv; +{ + int negative = 0; + int sig; + int i; + char *s; + + rb_secure(2); + if (argc < 2) + rb_raise(rb_eArgError, "wrong number of arguments -- kill(sig, pid...)"); + switch (TYPE(argv[0])) { + case T_FIXNUM: + sig = FIX2INT(argv[0]); + break; + + case T_SYMBOL: + s = rb_id2name(SYM2ID(argv[0])); + if (!s) rb_raise(rb_eArgError, "bad signal"); + goto str_signal; + + case T_STRING: + s = RSTRING(argv[0])->ptr; + if (s[0] == '-') { + negative++; + s++; + } + str_signal: + if (strncmp("SIG", s, 3) == 0) + s += 3; + if((sig = signm2signo(s)) == 0) + rb_raise(rb_eArgError, "unsupported name `SIG%s'", s); + + if (negative) + sig = -sig; + break; + + default: + { + VALUE str; + + str = rb_check_string_type(argv[0]); + if (!NIL_P(str)) { + s = RSTRING(str)->ptr; + goto str_signal; + } + rb_raise(rb_eArgError, "bad signal type %s", + rb_obj_classname(argv[0])); + } + break; + } + + if (sig < 0) { + sig = -sig; + for (i=1; i<argc; i++) { + int pid = NUM2INT(argv[i]); +#ifdef HAS_KILLPG + if (killpg(pid, sig) < 0) +#else + if (kill(-pid, sig) < 0) +#endif + rb_sys_fail(0); + } + } + else { + for (i=1; i<argc; i++) { + Check_Type(argv[i], T_FIXNUM); + if (kill(FIX2INT(argv[i]), sig) < 0) + rb_sys_fail(0); + } + } + return INT2FIX(i-1); +} + +static struct { + VALUE cmd; + int safe; +} trap_list[NSIG]; +static rb_atomic_t trap_pending_list[NSIG]; +static char rb_trap_accept_nativethreads[NSIG]; +rb_atomic_t rb_trap_pending; +rb_atomic_t rb_trap_immediate; +int rb_prohibit_interrupt = 1; + +void +rb_gc_mark_trap_list() +{ +#ifndef MACOS_UNUSE_SIGNAL + int i; + + for (i=0; i<NSIG; i++) { + if (trap_list[i].cmd) + rb_gc_mark(trap_list[i].cmd); + } +#endif /* MACOS_UNUSE_SIGNAL */ +} + +#ifdef __dietlibc__ +#define sighandler_t sh_t +#endif + +typedef RETSIGTYPE (*sighandler_t)_((int)); + +#ifdef POSIX_SIGNAL +static sighandler_t +ruby_signal(signum, handler) + int signum; + sighandler_t handler; +{ + struct sigaction sigact, old; + + rb_trap_accept_nativethreads[signum] = 0; + + sigact.sa_handler = handler; + sigemptyset(&sigact.sa_mask); + sigact.sa_flags = 0; +#if defined(SA_RESTART) + /* All other signals but VTALRM shall restart restartable syscall + VTALRM will cause EINTR to syscall if interrupted. + */ + if (signum != SIGVTALRM) { + sigact.sa_flags |= SA_RESTART; /* SVR4, 4.3+BSD */ + } +#endif +#ifdef SA_NOCLDWAIT + if (signum == SIGCHLD && handler == SIG_IGN) + sigact.sa_flags |= SA_NOCLDWAIT; +#endif + sigaction(signum, &sigact, &old); + return old.sa_handler; +} + +void +posix_signal(signum, handler) + int signum; + sighandler_t handler; +{ + ruby_signal(signum, handler); +} + +#ifdef HAVE_NATIVETHREAD +static sighandler_t +ruby_nativethread_signal(signum, handler) + int signum; + sighandler_t handler; +{ + sighandler_t old; + + old = ruby_signal(signum, handler); + rb_trap_accept_nativethreads[signum] = 1; + return old; +} + +void +posix_nativethread_signal(signum, handler) + int signum; + sighandler_t handler; +{ + ruby_nativethread_signal(signum, handler); +} +#endif +#else /* !POSIX_SIGNAL */ +#define ruby_signal(sig,handler) (rb_trap_accept_nativethreads[sig] = 0, signal((sig),(handler))) +#ifdef HAVE_NATIVETHREAD +static sighandler_t +ruby_nativethread_signal(signum, handler) + int signum; + sighandler_t handler; +{ + sighandler_t old; + + old = signal(signum, handler); + rb_trap_accept_nativethreads[signum] = 1; + return old; +} +#endif +#endif + +static void signal_exec _((int sig)); +static void +signal_exec(sig) + int sig; +{ + if (trap_list[sig].cmd == 0) { + switch (sig) { + case SIGINT: + rb_thread_interrupt(); + break; +#ifdef SIGHUP + case SIGHUP: +#endif +#ifdef SIGQUIT + case SIGQUIT: +#endif +#ifdef SIGALRM + case SIGALRM: +#endif +#ifdef SIGUSR1 + case SIGUSR1: +#endif +#ifdef SIGUSR2 + case SIGUSR2: +#endif + rb_thread_signal_raise(signo2signm(sig)); + break; + } + } + else { + rb_thread_trap_eval(trap_list[sig].cmd, sig, trap_list[sig].safe); + } +} + +static void +sigsend_to_ruby_thread(sig) + int sig; +{ +#ifdef HAVE_NATIVETHREAD_KILL +# ifdef HAVE_SIGPROCMASK + sigset_t mask, old_mask; +# else + int mask, old_mask; +# endif + +#ifdef HAVE_SIGPROCMASK + sigfillset(&mask); + sigprocmask(SIG_BLOCK, &mask, &old_mask); +#else + mask = sigblock(~0); + sigsetmask(mask); +#endif + + ruby_native_thread_kill(sig); +#endif +} + +static RETSIGTYPE sighandler _((int)); +static RETSIGTYPE +sighandler(sig) + int sig; +{ +#ifdef _WIN32 +#define IN_MAIN_CONTEXT(f, a) (rb_w32_main_context(a, f) ? (void)0 : f(a)) +#else +#define IN_MAIN_CONTEXT(f, a) f(a) +#endif + if (sig >= NSIG) { + rb_bug("trap_handler: Bad signal %d", sig); + } + +#if defined(HAVE_NATIVETHREAD) && defined(HAVE_NATIVETHREAD_KILL) + if (!is_ruby_native_thread() && !rb_trap_accept_nativethreads[sig]) { + sigsend_to_ruby_thread(sig); + return; + } +#endif + +#if !defined(BSD_SIGNAL) && !defined(POSIX_SIGNAL) + if (rb_trap_accept_nativethreads[sig]) { + ruby_nativethread_signal(sig, sighandler); + } else { + ruby_signal(sig, sighandler); + } +#endif + + if (trap_list[sig].cmd == 0 && ATOMIC_TEST(rb_trap_immediate)) { + IN_MAIN_CONTEXT(signal_exec, sig); + ATOMIC_SET(rb_trap_immediate, 1); + } + else { + ATOMIC_INC(rb_trap_pending); + ATOMIC_INC(trap_pending_list[sig]); + } +} + +#ifdef SIGBUS +static RETSIGTYPE sigbus _((int)); +static RETSIGTYPE +sigbus(sig) + int sig; +{ +#if defined(HAVE_NATIVETHREAD) && defined(HAVE_NATIVETHREAD_KILL) + if (!is_ruby_native_thread() && !rb_trap_accept_nativethreads[sig]) { + sigsend_to_ruby_thread(sig); + return; + } +#endif + + rb_bug("Bus Error"); +} +#endif + +#ifdef SIGSEGV +static RETSIGTYPE sigsegv _((int)); +static RETSIGTYPE +sigsegv(sig) + int sig; +{ +#if defined(HAVE_NATIVETHREAD) && defined(HAVE_NATIVETHREAD_KILL) + if (!is_ruby_native_thread() && !rb_trap_accept_nativethreads[sig]) { + sigsend_to_ruby_thread(sig); + return; + } +#endif + + rb_bug("Segmentation fault"); +} +#endif + +#ifdef SIGPIPE +static RETSIGTYPE sigpipe _((int)); +static RETSIGTYPE +sigpipe(sig) + int sig; +{ + /* do nothing */ +} +#endif + +void +rb_trap_exit() +{ +#ifndef MACOS_UNUSE_SIGNAL + if (trap_list[0].cmd) { + VALUE trap_exit = trap_list[0].cmd; + + trap_list[0].cmd = 0; + rb_eval_cmd(trap_exit, rb_ary_new3(1, INT2FIX(0)), trap_list[0].safe); + } +#endif +} + +void +rb_trap_exec() +{ +#ifndef MACOS_UNUSE_SIGNAL + int i; + + for (i=0; i<NSIG; i++) { + if (trap_pending_list[i]) { + trap_pending_list[i] = 0; + signal_exec(i); + } + } +#endif /* MACOS_UNUSE_SIGNAL */ + rb_trap_pending = 0; +} + +struct trap_arg { +#ifndef _WIN32 +# ifdef HAVE_SIGPROCMASK + sigset_t mask; +# else + int mask; +# endif +#endif + VALUE sig, cmd; +}; + +# ifdef HAVE_SIGPROCMASK +static sigset_t trap_last_mask; +# else +static int trap_last_mask; +# endif + +static RETSIGTYPE sigexit _((int)); +static RETSIGTYPE +sigexit(sig) + int sig; +{ +#if defined(HAVE_NATIVETHREAD) && defined(HAVE_NATIVETHREAD_KILL) + if (!is_ruby_native_thread() && !rb_trap_accept_nativethreads[sig]) { + sigsend_to_ruby_thread(sig); + return; + } +#endif + + rb_exit(0); +} + +static VALUE +trap(arg) + struct trap_arg *arg; +{ + sighandler_t func, oldfunc; + VALUE command, oldcmd; + int sig = -1; + char *s; + + func = sighandler; + command = arg->cmd; + if (NIL_P(command)) { + func = SIG_IGN; + } + else if (TYPE(command) == T_STRING) { + SafeStringValue(command); /* taint check */ + if (RSTRING(command)->len == 0) { + func = SIG_IGN; + } + else if (RSTRING(command)->len == 7) { + if (strncmp(RSTRING(command)->ptr, "SIG_IGN", 7) == 0) { + func = SIG_IGN; + } + else if (strncmp(RSTRING(command)->ptr, "SIG_DFL", 7) == 0) { + func = SIG_DFL; + } + else if (strncmp(RSTRING(command)->ptr, "DEFAULT", 7) == 0) { + func = SIG_DFL; + } + } + else if (RSTRING(command)->len == 6) { + if (strncmp(RSTRING(command)->ptr, "IGNORE", 6) == 0) { + func = SIG_IGN; + } + } + else if (RSTRING(command)->len == 4) { + if (strncmp(RSTRING(command)->ptr, "EXIT", 4) == 0) { + func = sigexit; + } + } + } + if (func == SIG_IGN || func == SIG_DFL) { + command = 0; + } + + switch (TYPE(arg->sig)) { + case T_FIXNUM: + sig = FIX2INT(arg->sig); + break; + + case T_SYMBOL: + s = rb_id2name(SYM2ID(arg->sig)); + if (!s) rb_raise(rb_eArgError, "bad signal"); + goto str_signal; + + case T_STRING: + s = RSTRING(arg->sig)->ptr; + + str_signal: + if (strncmp("SIG", s, 3) == 0) + s += 3; + sig = signm2signo(s); + if (sig == 0 && strcmp(s, "EXIT") != 0) + rb_raise(rb_eArgError, "unsupported signal SIG%s", s); + } + + if (sig < 0 || sig > NSIG) { + rb_raise(rb_eArgError, "invalid signal number (%d)", sig); + } +#if defined(HAVE_SETITIMER) + if (sig == SIGVTALRM) { + rb_raise(rb_eArgError, "SIGVTALRM reserved for Thread; can't set handler"); + } +#endif + if (func == SIG_DFL) { + switch (sig) { + case SIGINT: +#ifdef SIGHUP + case SIGHUP: +#endif +#ifdef SIGQUIT + case SIGQUIT: +#endif +#ifdef SIGALRM + case SIGALRM: +#endif +#ifdef SIGUSR1 + case SIGUSR1: +#endif +#ifdef SIGUSR2 + case SIGUSR2: +#endif + func = sighandler; + break; +#ifdef SIGBUS + case SIGBUS: + func = sigbus; + break; +#endif +#ifdef SIGSEGV + case SIGSEGV: + func = sigsegv; + break; +#endif +#ifdef SIGPIPE + case SIGPIPE: + func = sigpipe; + break; +#endif + } + } + oldfunc = ruby_signal(sig, func); + oldcmd = trap_list[sig].cmd; + if (!oldcmd) { + if (oldfunc == SIG_IGN) oldcmd = rb_str_new2("IGNORE"); + else if (oldfunc == sighandler) oldcmd = rb_str_new2("DEFAULT"); + else oldcmd = Qnil; + } + + trap_list[sig].cmd = command; + trap_list[sig].safe = ruby_safe_level; + /* enable at least specified signal. */ +#ifndef _WIN32 +#ifdef HAVE_SIGPROCMASK + sigdelset(&arg->mask, sig); +#else + arg->mask &= ~sigmask(sig); +#endif +#endif + return oldcmd; +} + +#ifndef _WIN32 +static VALUE +trap_ensure(arg) + struct trap_arg *arg; +{ + /* enable interrupt */ +#ifdef HAVE_SIGPROCMASK + sigprocmask(SIG_SETMASK, &arg->mask, NULL); +#else + sigsetmask(arg->mask); +#endif + trap_last_mask = arg->mask; + return 0; +} +#endif + +void +rb_trap_restore_mask() +{ +#ifndef _WIN32 +# ifdef HAVE_SIGPROCMASK + sigprocmask(SIG_SETMASK, &trap_last_mask, NULL); +# else + sigsetmask(trap_last_mask); +# endif +#endif +} + +/* + * call-seq: + * Signal.trap( signal, proc ) => obj + * Signal.trap( signal ) {| | block } => obj + * + * Specifies the handling of signals. The first parameter is a signal + * name (a string such as ``SIGALRM'', ``SIGUSR1'', and so on) or a + * signal number. The characters ``SIG'' may be omitted from the + * signal name. The command or block specifies code to be run when the + * signal is raised. If the command is the string ``IGNORE'' or + * ``SIG_IGN'', the signal will be ignored. If the command is + * ``DEFAULT'' or ``SIG_DFL'', the operating system's default handler + * will be invoked. If the command is ``EXIT'', the script will be + * terminated by the signal. Otherwise, the given command or block + * will be run. + * The special signal name ``EXIT'' or signal number zero will be + * invoked just prior to program termination. + * trap returns the previous handler for the given signal. + * + * Signal.trap(0, proc { puts "Terminating: #{$$}" }) + * Signal.trap("CLD") { puts "Child died" } + * fork && Process.wait + * + * produces: + * Terminating: 27461 + * Child died + * Terminating: 27460 + */ +static VALUE +sig_trap(argc, argv) + int argc; + VALUE *argv; +{ + struct trap_arg arg; + + rb_secure(2); + if (argc == 0 || argc > 2) { + rb_raise(rb_eArgError, "wrong number of arguments -- trap(sig, cmd)/trap(sig){...}"); + } + + arg.sig = argv[0]; + if (argc == 1) { + arg.cmd = rb_block_proc(); + } + else if (argc == 2) { + arg.cmd = argv[1]; + } + + if (OBJ_TAINTED(arg.cmd)) { + rb_raise(rb_eSecurityError, "Insecure: tainted signal trap"); + } +#ifndef _WIN32 + /* disable interrupt */ +# ifdef HAVE_SIGPROCMASK + sigfillset(&arg.mask); + sigprocmask(SIG_BLOCK, &arg.mask, &arg.mask); +# else + arg.mask = sigblock(~0); +# endif + + return rb_ensure(trap, (VALUE)&arg, trap_ensure, (VALUE)&arg); +#else + return trap(&arg); +#endif +} + +/* + * call-seq: + * Signal.list => a_hash + * + * Returns a list of signal names mapped to the corresponding + * underlying signal numbers. + * + * Signal.list #=> {"ABRT"=>6, "ALRM"=>14, "BUS"=>7, "CHLD"=>17, "CLD"=>17, "CONT"=>18, "FPE"=>8, "HUP"=>1, "ILL"=>4, "INT"=>2, "IO"=>29, "IOT"=>6, "KILL"=>9, "PIPE"=>13, "POLL"=>29, "PROF"=>27, "PWR"=>30, "QUIT"=>3, "SEGV"=>11, "STOP"=>19, "SYS"=>31, "TERM"=>15, "TRAP"=>5, "TSTP"=>20, "TTIN"=>21, "TTOU"=>22, "URG"=>23, "USR1"=>10, "USR2"=>12, "VTALRM"=>26, "WINCH"=>28, "XCPU"=>24, "XFSZ"=>25} + */ +static VALUE +sig_list() +{ + VALUE h = rb_hash_new(); + struct signals *sigs; + + for (sigs = siglist; sigs->signm; sigs++) { + rb_hash_aset(h, rb_str_new2(sigs->signm), INT2FIX(sigs->signo)); + } + return h; +} + +static void +install_sighandler(signum, handler) + int signum; + sighandler_t handler; +{ + sighandler_t old; + + old = ruby_signal(signum, handler); + if (old != SIG_DFL) { + ruby_signal(signum, old); + } +} + +#ifdef HAVE_NATIVETHREAD +static void +install_nativethread_sighandler(signum, handler) + int signum; + sighandler_t handler; +{ + sighandler_t old; + int old_st; + + old_st = rb_trap_accept_nativethreads[signum]; + old = ruby_nativethread_signal(signum, handler); + if (old != SIG_DFL) { + if (old_st) { + ruby_nativethread_signal(signum, old); + } else { + ruby_signal(signum, old); + } + } +} +#endif + +static void +init_sigchld(sig) + int sig; +{ + sighandler_t oldfunc; +#ifndef _WIN32 +# ifdef HAVE_SIGPROCMASK + sigset_t mask; +# else + int mask; +# endif +#endif + +#ifndef _WIN32 + /* disable interrupt */ +# ifdef HAVE_SIGPROCMASK + sigfillset(&mask); + sigprocmask(SIG_BLOCK, &mask, &mask); +# else + mask = sigblock(~0); +# endif +#endif + + oldfunc = ruby_signal(sig, SIG_DFL); + if (oldfunc != SIG_DFL && oldfunc != SIG_IGN) { + ruby_signal(sig, oldfunc); + } else { + trap_list[sig].cmd = 0; + } + +#ifndef _WIN32 +#ifdef HAVE_SIGPROCMASK + sigdelset(&mask, sig); + sigprocmask(SIG_SETMASK, &mask, NULL); +#else + mask &= ~sigmask(sig); + sigsetmask(mask); +#endif + trap_last_mask = mask; +#endif +} + +/* + * Many operating systems allow signals to be sent to running + * processes. Some signals have a defined effect on the process, while + * others may be trapped at the code level and acted upon. For + * example, your process may trap the USR1 signal and use it to toggle + * debugging, and may use TERM to initiate a controlled shutdown. + * + * pid = fork do + * Signal.trap("USR1") do + * $debug = !$debug + * puts "Debug now: #$debug" + * end + * Signal.trap("TERM") do + * puts "Terminating..." + * shutdown() + * end + * # . . . do some work . . . + * end + * + * Process.detach(pid) + * + * # Controlling program: + * Process.kill("USR1", pid) + * # ... + * Process.kill("USR1", pid) + * # ... + * Process.kill("TERM", pid) + * + * produces: + * Debug now: true + * Debug now: false + * Terminating... + * + * The list of available signal names and their interpretation is + * system dependent. Signal delivery semantics may also vary between + * systems; in particular signal delivery may not always be reliable. + */ +void +Init_signal() +{ +#ifndef MACOS_UNUSE_SIGNAL + VALUE mSignal = rb_define_module("Signal"); + + rb_define_global_function("trap", sig_trap, -1); + rb_define_module_function(mSignal, "trap", sig_trap, -1); + rb_define_module_function(mSignal, "list", sig_list, 0); + + install_sighandler(SIGINT, sighandler); +#ifdef SIGHUP + install_sighandler(SIGHUP, sighandler); +#endif +#ifdef SIGQUIT + install_sighandler(SIGQUIT, sighandler); +#endif +#ifdef SIGALRM + install_sighandler(SIGALRM, sighandler); +#endif +#ifdef SIGUSR1 + install_sighandler(SIGUSR1, sighandler); +#endif +#ifdef SIGUSR2 + install_sighandler(SIGUSR2, sighandler); +#endif + +#ifdef SIGBUS + install_sighandler(SIGBUS, sigbus); +#endif +#ifdef SIGSEGV + install_sighandler(SIGSEGV, sigsegv); +#endif +#ifdef SIGPIPE + install_sighandler(SIGPIPE, sigpipe); +#endif + +#ifdef SIGCLD + init_sigchld(SIGCLD); +#endif +#ifdef SIGCHLD + init_sigchld(SIGCHLD); +#endif + +#endif /* MACOS_UNUSE_SIGNAL */ +} +/********************************************************************** + sjis.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +static int EncLen_SJIS[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1 +}; + +static const char SJIS_CAN_BE_TRAIL_TABLE[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 +}; + +#define SJIS_ISMB_FIRST(byte) (EncLen_SJIS[byte] > 1) +#define SJIS_ISMB_TRAIL(byte) SJIS_CAN_BE_TRAIL_TABLE[(byte)] + +static int +sjis_mbc_enc_len(const UChar* p) +{ + return EncLen_SJIS[*p]; +} + +extern int +sjis_code_to_mbclen(OnigCodePoint code) +{ + if (code < 256) { + if (EncLen_SJIS[(int )code] == 1) + return 1; + else + return 0; + } + else if (code <= 0xffff) { + return 2; + } + else + return 0; +} + +static OnigCodePoint +sjis_mbc_to_code(const UChar* p, const UChar* end) +{ + int c, i, len; + OnigCodePoint n; + + len = enc_len(ONIG_ENCODING_SJIS, p); + c = *p++; + n = c; + if (len == 1) return n; + + for (i = 1; i < len; i++) { + if (p >= end) break; + c = *p++; + n <<= 8; n += c; + } + return n; +} + +static int +sjis_code_to_mbc(OnigCodePoint code, UChar *buf) +{ + UChar *p = buf; + + if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff)); + *p++ = (UChar )(code & 0xff); + +#if 0 + if (enc_len(ONIG_ENCODING_SJIS, buf) != (p - buf)) + return REGERR_INVALID_WIDE_CHAR_VALUE; +#endif + return p - buf; +} + +static int +sjis_mbc_to_normalize(OnigAmbigType flag, + const UChar** pp, const UChar* end, UChar* lower) +{ + const UChar* p = *pp; + + if (ONIGENC_IS_MBC_ASCII(p)) { + if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { + *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); + } + else { + *lower = *p; + } + + (*pp)++; + return 1; + } + else { + int len = enc_len(ONIG_ENCODING_SJIS, p); + + if (lower != p) { + int i; + for (i = 0; i < len; i++) { + *lower++ = *p++; + } + } + (*pp) += len; + return len; /* return byte length of converted char to lower */ + } +} + +static int +sjis_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) +{ + return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_SJIS, flag, pp, end); + +} + +static int +sjis_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if ((ctype & ONIGENC_CTYPE_WORD) != 0) { + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else { + return (sjis_code_to_mbclen(code) > 1 ? TRUE : FALSE); + } + + ctype &= ~ONIGENC_CTYPE_WORD; + if (ctype == 0) return FALSE; + } + + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else + return FALSE; +} + +static UChar* +sjis_left_adjust_char_head(const UChar* start, const UChar* s) +{ + const UChar *p; + int len; + + if (s <= start) return (UChar* )s; + p = s; + + if (SJIS_ISMB_TRAIL(*p)) { + while (p > start) { + if (! SJIS_ISMB_FIRST(*--p)) { + p++; + break; + } + } + } + len = enc_len(ONIG_ENCODING_SJIS, p); + if (p + len > s) return (UChar* )p; + p += len; + return (UChar* )(p + ((s - p) & ~1)); +} + +static int +sjis_is_allowed_reverse_match(const UChar* s, const UChar* end) +{ + const UChar c = *s; + return (SJIS_ISMB_TRAIL(c) ? FALSE : TRUE); +} + +OnigEncodingType OnigEncodingSJIS = { + sjis_mbc_enc_len, + "Shift_JIS", /* name */ + 2, /* max byte length */ + 1, /* min byte length */ + ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE, + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + }, + onigenc_is_mbc_newline_0x0a, + sjis_mbc_to_code, + sjis_code_to_mbclen, + sjis_code_to_mbc, + sjis_mbc_to_normalize, + sjis_is_mbc_ambiguous, + onigenc_ascii_get_all_pair_ambig_codes, + onigenc_nothing_get_all_comp_ambig_codes, + sjis_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + sjis_left_adjust_char_head, + sjis_is_allowed_reverse_match +}; +/********************************************************************** + + sprintf.c - + + $Author: matz $ + $Date: 2005/03/04 06:47:41 $ + created at: Fri Oct 15 10:39:26 JST 1993 + + Copyright (C) 1993-2003 Yukihiro Matsumoto + Copyright (C) 2000 Network Applied Communication Laboratory, Inc. + Copyright (C) 2000 Information-technology Promotion Agency, Japan + +**********************************************************************/ + +#include "ruby.h" +#include <ctype.h> +#include <math.h> + +#define BIT_DIGITS(N) (((N)*146)/485 + 1) /* log2(10) =~ 146/485 */ + +static void fmt_setup _((char*,int,int,int,int)); + +static char* +remove_sign_bits(str, base) + char *str; + int base; +{ + char *s, *t; + + s = t = str; + + if (base == 16) { + while (*t == 'f') { + t++; + } + } + else if (base == 8) { + if (*t == '3') t++; + while (*t == '7') { + t++; + } + } + else if (base == 2) { + while (*t == '1') { + t++; + } + } + if (t > s) { + while (*t) *s++ = *t++; + *s = '\0'; + } + + return str; +} + +static char +sign_bits(base, p) + int base; + const char *p; +{ + char c = '.'; + + switch (base) { + case 16: + if (*p == 'X') c = 'F'; + else c = 'f'; + break; + case 8: + c = '7'; break; + case 2: + c = '1'; break; + } + return c; +} + +#define FNONE 0 +#define FSHARP 1 +#define FMINUS 2 +#define FPLUS 4 +#define FZERO 8 +#define FSPACE 16 +#define FWIDTH 32 +#define FPREC 64 + +#define CHECK(l) do {\ + while (blen + (l) >= bsiz) {\ + bsiz*=2;\ + }\ + rb_str_resize(result, bsiz);\ + buf = RSTRING(result)->ptr;\ +} while (0) + +#define PUSH(s, l) do { \ + CHECK(l);\ + memcpy(&buf[blen], s, l);\ + blen += (l);\ +} while (0) + +#define GETARG() (nextvalue != Qundef ? nextvalue : \ + posarg < 0 ? \ + (rb_raise(rb_eArgError, "unnumbered(%d) mixed with numbered", nextarg), 0) : \ + (posarg = nextarg++, GETNTHARG(posarg))) + +#define GETPOSARG(n) (posarg > 0 ? \ + (rb_raise(rb_eArgError, "numbered(%d) after unnumbered(%d)", n, posarg), 0) : \ + ((n < 1) ? (rb_raise(rb_eArgError, "invalid index - %d$", n), 0) : \ + (posarg = -1, GETNTHARG(n)))) + +#define GETNTHARG(nth) \ + ((nth >= argc) ? (rb_raise(rb_eArgError, "too few arguments"), 0) : argv[nth]) + +#define GETASTER(val) do { \ + t = p++; \ + n = 0; \ + for (; p < end && ISDIGIT(*p); p++) { \ + n = 10 * n + (*p - '0'); \ + } \ + if (p >= end) { \ + rb_raise(rb_eArgError, "malformed format string - %%*[0-9]"); \ + } \ + if (*p == '$') { \ + tmp = GETPOSARG(n); \ + } \ + else { \ + tmp = GETARG(); \ + p = t; \ + } \ + val = NUM2INT(tmp); \ +} while (0) + + +/* + * call-seq: + * format(format_string [, arguments...] ) => string + * sprintf(format_string [, arguments...] ) => string + * + * Returns the string resulting from applying <i>format_string</i> to + * any additional arguments. Within the format string, any characters + * other than format sequences are copied to the result. A format + * sequence consists of a percent sign, followed by optional flags, + * width, and precision indicators, then terminated with a field type + * character. The field type controls how the corresponding + * <code>sprintf</code> argument is to be interpreted, while the flags + * modify that interpretation. The field type characters are listed + * in the table at the end of this section. The flag characters are: + * + * Flag | Applies to | Meaning + * ---------+--------------+----------------------------------------- + * space | bdeEfgGioxXu | Leave a space at the start of + * | | positive numbers. + * ---------+--------------+----------------------------------------- + * (digit)$ | all | Specifies the absolute argument number + * | | for this field. Absolute and relative + * | | argument numbers cannot be mixed in a + * | | sprintf string. + * ---------+--------------+----------------------------------------- + * # | beEfgGoxX | Use an alternative format. For the + * | | conversions `o', `x', `X', and `b', + * | | prefix the result with ``0'', ``0x'', ``0X'', + * | | and ``0b'', respectively. For `e', + * | | `E', `f', `g', and 'G', force a decimal + * | | point to be added, even if no digits follow. + * | | For `g' and 'G', do not remove trailing zeros. + * ---------+--------------+----------------------------------------- + * + | bdeEfgGioxXu | Add a leading plus sign to positive numbers. + * ---------+--------------+----------------------------------------- + * - | all | Left-justify the result of this conversion. + * ---------+--------------+----------------------------------------- + * 0 (zero) | all | Pad with zeros, not spaces. + * ---------+--------------+----------------------------------------- + * * | all | Use the next argument as the field width. + * | | If negative, left-justify the result. If the + * | | asterisk is followed by a number and a dollar + * | | sign, use the indicated argument as the width. + * + * + * The field width is an optional integer, followed optionally by a + * period and a precision. The width specifies the minimum number of + * characters that will be written to the result for this field. For + * numeric fields, the precision controls the number of decimal places + * displayed. For string fields, the precision determines the maximum + * number of characters to be copied from the string. (Thus, the format + * sequence <code>%10.10s</code> will always contribute exactly ten + * characters to the result.) + * + * The field types are: + * + * Field | Conversion + * ------+-------------------------------------------------------------- + * b | Convert argument as a binary number. + * c | Argument is the numeric code for a single character. + * d | Convert argument as a decimal number. + * E | Equivalent to `e', but uses an uppercase E to indicate + * | the exponent. + * e | Convert floating point argument into exponential notation + * | with one digit before the decimal point. The precision + * | determines the number of fractional digits (defaulting to six). + * f | Convert floating point argument as [-]ddd.ddd, + * | where the precision determines the number of digits after + * | the decimal point. + * G | Equivalent to `g', but use an uppercase `E' in exponent form. + * g | Convert a floating point number using exponential form + * | if the exponent is less than -4 or greater than or + * | equal to the precision, or in d.dddd form otherwise. + * i | Identical to `d'. + * o | Convert argument as an octal number. + * p | The valuing of argument.inspect. + * s | Argument is a string to be substituted. If the format + * | sequence contains a precision, at most that many characters + * | will be copied. + * u | Treat argument as an unsigned decimal number. + * X | Convert argument as a hexadecimal number using uppercase + * | letters. Negative numbers will be displayed with two + * | leading periods (representing an infinite string of + * | leading 'FF's. + * x | Convert argument as a hexadecimal number. + * | Negative numbers will be displayed with two + * | leading periods (representing an infinite string of + * | leading 'ff's. + * + * Examples: + * + * sprintf("%d %04x", 123, 123) #=> "123 007b" + * sprintf("%08b '%4s'", 123, 123) #=> "01111011 ' 123'" + * sprintf("%1$*2$s %2$d %1$s", "hello", 8) #=> " hello 8 hello" + * sprintf("%1$*2$s %2$d", "hello", -8) #=> "hello -8" + * sprintf("%+g:% g:%-g", 1.23, 1.23, 1.23) #=> "+1.23: 1.23:1.23" + */ + +VALUE +rb_f_sprintf(argc, argv) + int argc; + VALUE *argv; +{ + VALUE fmt; + const char *p, *end; + char *buf; + int blen, bsiz; + VALUE result; + + int width, prec, flags = FNONE; + int nextarg = 1; + int posarg = 0; + int tainted = 0; + VALUE nextvalue; + VALUE tmp; + VALUE str; + + fmt = GETNTHARG(0); + if (OBJ_TAINTED(fmt)) tainted = 1; + StringValue(fmt); + fmt = rb_str_new4(fmt); + p = RSTRING(fmt)->ptr; + end = p + RSTRING(fmt)->len; + blen = 0; + bsiz = 120; + result = rb_str_buf_new(bsiz); + buf = RSTRING(result)->ptr; + + for (; p < end; p++) { + const char *t; + int n; + + for (t = p; t < end && *t != '%'; t++) ; + PUSH(p, t - p); + if (t >= end) { + /* end of fmt string */ + goto sprint_exit; + } + p = t + 1; /* skip `%' */ + + width = prec = -1; + nextvalue = Qundef; + retry: + switch (*p) { + default: + if (ISPRINT(*p)) + rb_raise(rb_eArgError, "malformed format string - %%%c", *p); + else + rb_raise(rb_eArgError, "malformed format string"); + break; + + case ' ': + flags |= FSPACE; + p++; + goto retry; + + case '#': + flags |= FSHARP; + p++; + goto retry; + + case '+': + flags |= FPLUS; + p++; + goto retry; + + case '-': + flags |= FMINUS; + p++; + goto retry; + + case '0': + flags |= FZERO; + p++; + goto retry; + + case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + n = 0; + for (; p < end && ISDIGIT(*p); p++) { + n = 10 * n + (*p - '0'); + } + if (p >= end) { + rb_raise(rb_eArgError, "malformed format string - %%[0-9]"); + } + if (*p == '$') { + if (nextvalue != Qundef) { + rb_raise(rb_eArgError, "value given twice - %d$", n); + } + nextvalue = GETPOSARG(n); + p++; + goto retry; + } + width = n; + flags |= FWIDTH; + goto retry; + + case '*': + if (flags & FWIDTH) { + rb_raise(rb_eArgError, "width given twice"); + } + + flags |= FWIDTH; + GETASTER(width); + if (width < 0) { + flags |= FMINUS; + width = -width; + } + p++; + goto retry; + + case '.': + if (flags & FPREC) { + rb_raise(rb_eArgError, "precision given twice"); + } + flags |= FPREC; + + prec = 0; + p++; + if (*p == '*') { + GETASTER(prec); + if (prec < 0) { /* ignore negative precision */ + flags &= ~FPREC; + } + p++; + goto retry; + } + + for (; p < end && ISDIGIT(*p); p++) { + prec = 10 * prec + (*p - '0'); + } + if (p >= end) { + rb_raise(rb_eArgError, "malformed format string - %%.[0-9]"); + } + goto retry; + + case '\n': + p--; + case '\0': + case '%': + if (flags != FNONE) { + rb_raise(rb_eArgError, "illegal format character - %%"); + } + PUSH("%", 1); + break; + + case 'c': + { + VALUE val = GETARG(); + char c; + + if (!(flags & FMINUS)) + while (--width > 0) + PUSH(" ", 1); + c = NUM2INT(val) & 0xff; + PUSH(&c, 1); + while (--width > 0) + PUSH(" ", 1); + } + break; + + case 's': + case 'p': + { + VALUE arg = GETARG(); + long len; + + if (*p == 'p') arg = rb_inspect(arg); + str = rb_obj_as_string(arg); + if (OBJ_TAINTED(str)) tainted = 1; + len = RSTRING(str)->len; + if (flags&FPREC) { + if (prec < len) { + len = prec; + } + } + if (flags&FWIDTH) { + if (width > len) { + CHECK(width); + width -= len; + if (!(flags&FMINUS)) { + while (width--) { + buf[blen++] = ' '; + } + } + memcpy(&buf[blen], RSTRING(str)->ptr, len); + blen += len; + if (flags&FMINUS) { + while (width--) { + buf[blen++] = ' '; + } + } + break; + } + } + PUSH(RSTRING(str)->ptr, len); + } + break; + + case 'd': + case 'i': + case 'o': + case 'x': + case 'X': + case 'b': + case 'B': + case 'u': + { + volatile VALUE val = GETARG(); + char fbuf[32], nbuf[64], *s, *t; + char *prefix = 0; + int sign = 0; + char sc = 0; + long v = 0; + int base, bignum = 0; + int len, pos; + + switch (*p) { + case 'd': + case 'i': + sign = 1; break; + case 'o': + case 'x': + case 'X': + case 'b': + case 'B': + case 'u': + default: + if (flags&(FPLUS|FSPACE)) sign = 1; + break; + } + if (flags & FSHARP) { + switch (*p) { + case 'o': + prefix = "0"; break; + case 'x': + prefix = "0x"; break; + case 'X': + prefix = "0X"; break; + case 'b': + prefix = "0b"; break; + case 'B': + prefix = "0B"; break; + } + if (prefix) { + width -= strlen(prefix); + } + } + + bin_retry: + switch (TYPE(val)) { + case T_FLOAT: + val = rb_dbl2big(RFLOAT(val)->value); + if (FIXNUM_P(val)) goto bin_retry; + bignum = 1; + break; + case T_STRING: + val = rb_str_to_inum(val, 0, Qtrue); + goto bin_retry; + case T_BIGNUM: + bignum = 1; + break; + case T_FIXNUM: + v = FIX2LONG(val); + break; + default: + val = rb_Integer(val); + goto bin_retry; + } + + switch (*p) { + case 'o': + base = 8; break; + case 'x': + case 'X': + base = 16; break; + case 'b': + case 'B': + base = 2; break; + case 'u': + case 'd': + case 'i': + default: + base = 10; break; + } + + if (!bignum) { + if (base == 2) { + val = rb_int2big(v); + goto bin_retry; + } + if (sign) { + char c = *p; + if (c == 'i') c = 'd'; /* %d and %i are identical */ + if (v < 0) { + v = -v; + sc = '-'; + width--; + } + else if (flags & FPLUS) { + sc = '+'; + width--; + } + else if (flags & FSPACE) { + sc = ' '; + width--; + } + sprintf(fbuf, "%%l%c", c); + sprintf(nbuf, fbuf, v); + } + else { + s = nbuf; + if (v < 0) { + if (base == 10) { + rb_warning("negative number for %%u specifier"); + } + else if (!(flags&(FPREC|FZERO))) { + strcpy(s, ".."); + s += 2; + } + } + sprintf(fbuf, "%%l%c", *p == 'X' ? 'x' : *p); + sprintf(s, fbuf, v); + if (v < 0) { + char d = 0; + + remove_sign_bits(s, base); + switch (base) { + case 16: + d = 'f'; break; + case 8: + d = '7'; break; + } + if (d && *s != d) { + memmove(s+1, s, strlen(s)+1); + *s = d; + } + } + } + s = nbuf; + } + else { + if (sign) { + tmp = rb_big2str(val, base); + s = RSTRING(tmp)->ptr; + if (s[0] == '-') { + s++; + sc = '-'; + width--; + } + else if (flags & FPLUS) { + sc = '+'; + width--; + } + else if (flags & FSPACE) { + sc = ' '; + width--; + } + } + else { + if (!RBIGNUM(val)->sign) { + val = rb_big_clone(val); + rb_big_2comp(val); + } + tmp = rb_big2str(val, base); + s = RSTRING(tmp)->ptr; + if (*s == '-') { + if (base == 10) { + rb_warning("negative number for %%u specifier"); + } + else { + remove_sign_bits(++s, base); + tmp = rb_str_new(0, 3+strlen(s)); + t = RSTRING(tmp)->ptr; + if (!(flags&(FPREC|FZERO))) { + strcpy(t, ".."); + t += 2; + } + switch (base) { + case 16: + if (s[0] != 'f') strcpy(t++, "f"); break; + case 8: + if (s[0] != '7') strcpy(t++, "7"); break; + case 2: + if (s[0] != '1') strcpy(t++, "1"); break; + } + strcpy(t, s); + s = RSTRING(tmp)->ptr; + } + } + } + } + + pos = -1; + len = strlen(s); + + if (*p == 'X') { + char *pp = s; + while (*pp) { + *pp = toupper(*pp); + pp++; + } + } + if ((flags&(FZERO|FPREC)) == FZERO) { + prec = width; + width = 0; + } + else { + if (prec < len) prec = len; + width -= prec; + } + if (!(flags&FMINUS)) { + CHECK(width); + while (width-- > 0) { + buf[blen++] = ' '; + } + } + if (sc) PUSH(&sc, 1); + if (prefix) { + int plen = strlen(prefix); + PUSH(prefix, plen); + } + CHECK(prec - len); + if (!bignum && v < 0) { + char c = sign_bits(base, p); + while (len < prec--) { + buf[blen++] = c; + } + } + else { + char c; + + if (!sign && bignum && !RBIGNUM(val)->sign) + c = sign_bits(base, p); + else + c = '0'; + while (len < prec--) { + buf[blen++] = c; + } + } + PUSH(s, len); + CHECK(width); + while (width-- > 0) { + buf[blen++] = ' '; + } + } + break; + + case 'f': + case 'g': + case 'G': + case 'e': + case 'E': + { + VALUE val = GETARG(); + double fval; + int i, need = 6; + char fbuf[32]; + + fval = RFLOAT(rb_Float(val))->value; + if (isnan(fval) || isinf(fval)) { + char *expr; + + if (isnan(fval)) { + expr = "NaN"; + } + else { + expr = "Inf"; + } + need = strlen(expr); + if ((!isnan(fval) && fval < 0.0) || (flags & FPLUS)) + need++; + if ((flags & FWIDTH) && need < width) + need = width; + + CHECK(need); + sprintf(&buf[blen], "%*s", need, ""); + if (flags & FMINUS) { + if (!isnan(fval) && fval < 0.0) + buf[blen++] = '-'; + else if (flags & FPLUS) + buf[blen++] = '+'; + else if (flags & FSPACE) + blen++; + strncpy(&buf[blen], expr, strlen(expr)); + } + else if (flags & FZERO) { + if (!isnan(fval) && fval < 0.0) { + buf[blen++] = '-'; + need--; + } + else if (flags & FPLUS) { + buf[blen++] = '+'; + need--; + } + else if (flags & FSPACE) { + blen++; + need--; + } + while (need-- - strlen(expr) > 0) { + buf[blen++] = '0'; + } + strncpy(&buf[blen], expr, strlen(expr)); + } + else { + if (!isnan(fval) && fval < 0.0) + buf[blen + need - strlen(expr) - 1] = '-'; + else if (flags & FPLUS) + buf[blen + need - strlen(expr) - 1] = '+'; + strncpy(&buf[blen + need - strlen(expr)], expr, + strlen(expr)); + } + blen += strlen(&buf[blen]); + break; + } + + fmt_setup(fbuf, *p, flags, width, prec); + need = 0; + if (*p != 'e' && *p != 'E') { + i = INT_MIN; + frexp(fval, &i); + if (i > 0) + need = BIT_DIGITS(i); + } + need += (flags&FPREC) ? prec : 6; + if ((flags&FWIDTH) && need < width) + need = width; + need += 20; + + CHECK(need); + sprintf(&buf[blen], fbuf, fval); + blen += strlen(&buf[blen]); + } + break; + } + flags = FNONE; + } + + sprint_exit: + /* XXX - We cannot validiate the number of arguments if (digit)$ style used. + */ + if (RTEST(ruby_verbose) && posarg >= 0 && nextarg < argc) { + rb_raise(rb_eArgError, "too many arguments for format string"); + } + rb_str_resize(result, blen); + + if (tainted) OBJ_TAINT(result); + return result; +} + +static void +fmt_setup(buf, c, flags, width, prec) + char *buf; + int c; + int flags, width, prec; +{ + *buf++ = '%'; + if (flags & FSHARP) *buf++ = '#'; + if (flags & FPLUS) *buf++ = '+'; + if (flags & FMINUS) *buf++ = '-'; + if (flags & FZERO) *buf++ = '0'; + if (flags & FSPACE) *buf++ = ' '; + + if (flags & FWIDTH) { + sprintf(buf, "%d", width); + buf += strlen(buf); + } + + if (flags & FPREC) { + sprintf(buf, ".%d", prec); + buf += strlen(buf); + } + + *buf++ = c; + *buf = '\0'; +} +/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */ + +/* static char sccsid[] = "@(#) st.c 5.1 89/12/14 Crucible"; */ + +#include "config.h" +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#ifdef _WIN32 +#include <malloc.h> +#endif + +#ifdef NOT_RUBY +#include "regint.h" +#else +#ifdef RUBY_PLATFORM +#define xmalloc ruby_xmalloc +#define xcalloc ruby_xcalloc +#define xrealloc ruby_xrealloc +#define xfree ruby_xfree + +void *xmalloc(long); +void *xcalloc(long, long); +void *xrealloc(void *, long); +void xfree(void *); +#endif +#endif + +#include "st.h" + +typedef struct st_table_entry st_table_entry; + +struct st_table_entry { + unsigned int hash; + st_data_t key; + st_data_t record; + st_table_entry *next; +}; + +#define ST_DEFAULT_MAX_DENSITY 5 +#define ST_DEFAULT_INIT_TABLE_SIZE 11 + + /* + * DEFAULT_MAX_DENSITY is the default for the largest we allow the + * average number of items per bin before increasing the number of + * bins + * + * DEFAULT_INIT_TABLE_SIZE is the default for the number of bins + * allocated initially + * + */ + +static int numcmp(long, long); +static int numhash(long); +static struct st_hash_type type_numhash = { + numcmp, + numhash, +}; + +/* extern int strcmp(const char *, const char *); */ +static int strhash(const char *); +static struct st_hash_type type_strhash = { + strcmp, + strhash, +}; + +static void rehash(st_table *); + +#define alloc(type) (type*)xmalloc((unsigned)sizeof(type)) +#define Calloc(n,s) (char*)xcalloc((n),(s)) + +#define EQUAL(table,x,y) ((x)==(y) || (*table->type->compare)((x),(y)) == 0) + +#define do_hash(key,table) (unsigned int)(*(table)->type->hash)((key)) +#define do_hash_bin(key,table) (do_hash(key, table)%(table)->num_bins) + +/* + * MINSIZE is the minimum size of a dictionary. + */ + +#define MINSIZE 8 + +/* +Table of prime numbers 2^n+a, 2<=n<=30. +*/ +static long primes[] = { + 8 + 3, + 16 + 3, + 32 + 5, + 64 + 3, + 128 + 3, + 256 + 27, + 512 + 9, + 1024 + 9, + 2048 + 5, + 4096 + 3, + 8192 + 27, + 16384 + 43, + 32768 + 3, + 65536 + 45, + 131072 + 29, + 262144 + 3, + 524288 + 21, + 1048576 + 7, + 2097152 + 17, + 4194304 + 15, + 8388608 + 9, + 16777216 + 43, + 33554432 + 35, + 67108864 + 15, + 134217728 + 29, + 268435456 + 3, + 536870912 + 11, + 1073741824 + 85, + 0 +}; + +static int +new_size(size) + int size; +{ + int i; + +#if 0 + for (i=3; i<31; i++) { + if ((1<<i) > size) return 1<<i; + } + return -1; +#else + int newsize; + + for (i = 0, newsize = MINSIZE; + i < (int )(sizeof(primes)/sizeof(primes[0])); + i++, newsize <<= 1) + { + if (newsize > size) return primes[i]; + } + /* Ran out of polynomials */ + return -1; /* should raise exception */ +#endif +} + +#ifdef HASH_LOG +static int collision = 0; +static int init_st = 0; + +static void +stat_col() +{ + FILE *f = fopen("/tmp/col", "w"); + fprintf(f, "collision: %d\n", collision); + fclose(f); +} +#endif + +st_table* +st_init_table_with_size(type, size) + struct st_hash_type *type; + int size; +{ + st_table *tbl; + +#ifdef HASH_LOG + if (init_st == 0) { + init_st = 1; + atexit(stat_col); + } +#endif + + size = new_size(size); /* round up to prime number */ + + tbl = alloc(st_table); + tbl->type = type; + tbl->num_entries = 0; + tbl->num_bins = size; + tbl->bins = (st_table_entry **)Calloc(size, sizeof(st_table_entry*)); + + return tbl; +} + +st_table* +st_init_table(type) + struct st_hash_type *type; +{ + return st_init_table_with_size(type, 0); +} + +st_table* +st_init_numtable(void) +{ + return st_init_table(&type_numhash); +} + +st_table* +st_init_numtable_with_size(size) + int size; +{ + return st_init_table_with_size(&type_numhash, size); +} + +st_table* +st_init_strtable(void) +{ + return st_init_table(&type_strhash); +} + +st_table* +st_init_strtable_with_size(size) + int size; +{ + return st_init_table_with_size(&type_strhash, size); +} + +void +st_free_table(table) + st_table *table; +{ + register st_table_entry *ptr, *next; + int i; + + for(i = 0; i < table->num_bins; i++) { + ptr = table->bins[i]; + while (ptr != 0) { + next = ptr->next; + free(ptr); + ptr = next; + } + } + free(table->bins); + free(table); +} + +#define PTR_NOT_EQUAL(table, ptr, hash_val, key) \ +((ptr) != 0 && (ptr->hash != (hash_val) || !EQUAL((table), (key), (ptr)->key))) + +#ifdef HASH_LOG +#define COLLISION collision++ +#else +#define COLLISION +#endif + +#define FIND_ENTRY(table, ptr, hash_val, bin_pos) do {\ + bin_pos = hash_val%(table)->num_bins;\ + ptr = (table)->bins[bin_pos];\ + if (PTR_NOT_EQUAL(table, ptr, hash_val, key)) {\ + COLLISION;\ + while (PTR_NOT_EQUAL(table, ptr->next, hash_val, key)) {\ + ptr = ptr->next;\ + }\ + ptr = ptr->next;\ + }\ +} while (0) + +int +st_lookup(table, key, value) + st_table *table; + register st_data_t key; + st_data_t *value; +{ + unsigned int hash_val, bin_pos; + register st_table_entry *ptr; + + hash_val = do_hash(key, table); + FIND_ENTRY(table, ptr, hash_val, bin_pos); + + if (ptr == 0) { + return 0; + } + else { + if (value != 0) *value = ptr->record; + return 1; + } +} + +#define ADD_DIRECT(table, key, value, hash_val, bin_pos)\ +do {\ + st_table_entry *entry;\ + if (table->num_entries/(table->num_bins) > ST_DEFAULT_MAX_DENSITY) {\ + rehash(table);\ + bin_pos = hash_val % table->num_bins;\ + }\ + \ + entry = alloc(st_table_entry);\ + \ + entry->hash = hash_val;\ + entry->key = key;\ + entry->record = value;\ + entry->next = table->bins[bin_pos];\ + table->bins[bin_pos] = entry;\ + table->num_entries++;\ +} while (0) + +int +st_insert(table, key, value) + register st_table *table; + register st_data_t key; + st_data_t value; +{ + unsigned int hash_val, bin_pos; + register st_table_entry *ptr; + + hash_val = do_hash(key, table); + FIND_ENTRY(table, ptr, hash_val, bin_pos); + + if (ptr == 0) { + ADD_DIRECT(table, key, value, hash_val, bin_pos); + return 0; + } + else { + ptr->record = value; + return 1; + } +} + +void +st_add_direct(table, key, value) + st_table *table; + st_data_t key; + st_data_t value; +{ + unsigned int hash_val, bin_pos; + + hash_val = do_hash(key, table); + bin_pos = hash_val % table->num_bins; + ADD_DIRECT(table, key, value, hash_val, bin_pos); +} + +static void +rehash(table) + register st_table *table; +{ + register st_table_entry *ptr, *next, **new_bins; + int i, old_num_bins = table->num_bins, new_num_bins; + unsigned int hash_val; + + new_num_bins = new_size(old_num_bins+1); + new_bins = (st_table_entry**)Calloc(new_num_bins, sizeof(st_table_entry*)); + + for(i = 0; i < old_num_bins; i++) { + ptr = table->bins[i]; + while (ptr != 0) { + next = ptr->next; + hash_val = ptr->hash % new_num_bins; + ptr->next = new_bins[hash_val]; + new_bins[hash_val] = ptr; + ptr = next; + } + } + free(table->bins); + table->num_bins = new_num_bins; + table->bins = new_bins; +} + +st_table* +st_copy(old_table) + st_table *old_table; +{ + st_table *new_table; + st_table_entry *ptr, *entry; + int i, num_bins = old_table->num_bins; + + new_table = alloc(st_table); + if (new_table == 0) { + return 0; + } + + *new_table = *old_table; + new_table->bins = (st_table_entry**) + Calloc((unsigned)num_bins, sizeof(st_table_entry*)); + + if (new_table->bins == 0) { + free(new_table); + return 0; + } + + for(i = 0; i < num_bins; i++) { + new_table->bins[i] = 0; + ptr = old_table->bins[i]; + while (ptr != 0) { + entry = alloc(st_table_entry); + if (entry == 0) { + free(new_table->bins); + free(new_table); + return 0; + } + *entry = *ptr; + entry->next = new_table->bins[i]; + new_table->bins[i] = entry; + ptr = ptr->next; + } + } + return new_table; +} + +int +st_delete(table, key, value) + register st_table *table; + register st_data_t *key; + st_data_t *value; +{ + unsigned int hash_val; + st_table_entry *tmp; + register st_table_entry *ptr; + + hash_val = do_hash_bin(*key, table); + ptr = table->bins[hash_val]; + + if (ptr == 0) { + if (value != 0) *value = 0; + return 0; + } + + if (EQUAL(table, *key, ptr->key)) { + table->bins[hash_val] = ptr->next; + table->num_entries--; + if (value != 0) *value = ptr->record; + *key = ptr->key; + free(ptr); + return 1; + } + + for(; ptr->next != 0; ptr = ptr->next) { + if (EQUAL(table, ptr->next->key, *key)) { + tmp = ptr->next; + ptr->next = ptr->next->next; + table->num_entries--; + if (value != 0) *value = tmp->record; + *key = tmp->key; + free(tmp); + return 1; + } + } + + return 0; +} + +int +st_delete_safe(table, key, value, never) + register st_table *table; + register st_data_t *key; + st_data_t *value; + st_data_t never; +{ + unsigned int hash_val; + register st_table_entry *ptr; + + hash_val = do_hash_bin(*key, table); + ptr = table->bins[hash_val]; + + if (ptr == 0) { + if (value != 0) *value = 0; + return 0; + } + + for(; ptr != 0; ptr = ptr->next) { + if ((ptr->key != never) && EQUAL(table, ptr->key, *key)) { + table->num_entries--; + *key = ptr->key; + if (value != 0) *value = ptr->record; + ptr->key = ptr->record = never; + return 1; + } + } + + return 0; +} + +static int +delete_never(key, value, never) + st_data_t key, value, never; +{ + if (value == never) return ST_DELETE; + return ST_CONTINUE; +} + +void +st_cleanup_safe(table, never) + st_table *table; + st_data_t never; +{ + int num_entries = table->num_entries; + + st_foreach(table, delete_never, never); + table->num_entries = num_entries; +} + +int +st_foreach(table, func, arg) + st_table *table; + int (*func)(); + st_data_t arg; +{ + st_table_entry *ptr, *last, *tmp; + enum st_retval retval; + int i; + + for(i = 0; i < table->num_bins; i++) { + last = 0; + for(ptr = table->bins[i]; ptr != 0;) { + retval = (*func)(ptr->key, ptr->record, arg); + switch (retval) { + case ST_CHECK: /* check if hash is modified during iteration */ + tmp = 0; + if (i < table->num_bins) { + for (tmp = table->bins[i]; tmp; tmp=tmp->next) { + if (tmp == ptr) break; + } + } + if (!tmp) { + /* call func with error notice */ + return 1; + } + /* fall through */ + case ST_CONTINUE: + last = ptr; + ptr = ptr->next; + break; + case ST_STOP: + return 0; + case ST_DELETE: + tmp = ptr; + if (last == 0) { + table->bins[i] = ptr->next; + } + else { + last->next = ptr->next; + } + ptr = ptr->next; + free(tmp); + table->num_entries--; + } + } + } + return 0; +} + +static int +strhash(string) + register const char *string; +{ + register int c; + +#ifdef HASH_ELFHASH + register unsigned int h = 0, g; + + while ((c = *string++) != '\0') { + h = ( h << 4 ) + c; + if ( g = h & 0xF0000000 ) + h ^= g >> 24; + h &= ~g; + } + return h; +#elif HASH_PERL + register int val = 0; + + while ((c = *string++) != '\0') { + val += c; + val += (val << 10); + val ^= (val >> 6); + } + val += (val << 3); + val ^= (val >> 11); + + return val + (val << 15); +#else + register int val = 0; + + while ((c = *string++) != '\0') { + val = val*997 + c; + } + + return val + (val>>5); +#endif +} + +static int +numcmp(x, y) + long x, y; +{ + return x != y; +} + +static int +numhash(n) + long n; +{ + return n; +} +/********************************************************************** + + string.c - + + $Author: matz $ + $Date: 2005/03/04 06:47:41 $ + created at: Mon Aug 9 17:12:58 JST 1993 + + Copyright (C) 1993-2003 Yukihiro Matsumoto + Copyright (C) 2000 Network Applied Communication Laboratory, Inc. + Copyright (C) 2000 Information-technology Promotion Agency, Japan + +**********************************************************************/ + +#include "ruby.h" +#include "re.h" + +#define BEG(no) regs->beg[no] +#define END(no) regs->end[no] + +#include <math.h> +#include <ctype.h> + +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif + +VALUE rb_cString; + +#define STR_TMPLOCK FL_USER1 +#define STR_ASSOC FL_USER3 +#define STR_NOCAPA (ELTS_SHARED|STR_ASSOC) + +#define RESIZE_CAPA(str,capacity) do {\ + REALLOC_N(RSTRING(str)->ptr, char, (capacity)+1);\ + if (!FL_TEST(str, STR_NOCAPA))\ + RSTRING(str)->aux.capa = (capacity);\ +} while (0) + +VALUE rb_fs; + +static inline void +str_mod_check(s, p, len) + VALUE s; + char *p; + long len; +{ + if (RSTRING(s)->ptr != p || RSTRING(s)->len != len){ + rb_raise(rb_eRuntimeError, "string modified"); + } +} + +static inline void +str_frozen_check(s) + VALUE s; +{ + if (OBJ_FROZEN(s)) { + rb_raise(rb_eRuntimeError, "string frozen"); + } +} + +static VALUE str_alloc _((VALUE)); +static VALUE +str_alloc(klass) + VALUE klass; +{ + NEWOBJ(str, struct RString); + OBJSETUP(str, klass, T_STRING); + + str->ptr = 0; + str->len = 0; + str->aux.capa = 0; + + return (VALUE)str; +} + +static VALUE +str_new(klass, ptr, len) + VALUE klass; + const char *ptr; + long len; +{ + VALUE str; + + if (len < 0) { + rb_raise(rb_eArgError, "negative string size (or size too big)"); + } + + str = str_alloc(klass); + RSTRING(str)->len = len; + RSTRING(str)->aux.capa = len; + RSTRING(str)->ptr = ALLOC_N(char,len+1); + if (ptr) { + memcpy(RSTRING(str)->ptr, ptr, len); + } + RSTRING(str)->ptr[len] = '\0'; + return str; +} + +VALUE +rb_str_new(ptr, len) + const char *ptr; + long len; +{ + return str_new(rb_cString, ptr, len); +} + +VALUE +rb_str_new2(ptr) + const char *ptr; +{ + if (!ptr) { + rb_raise(rb_eArgError, "NULL pointer given"); + } + return rb_str_new(ptr, strlen(ptr)); +} + +VALUE +rb_tainted_str_new(ptr, len) + const char *ptr; + long len; +{ + VALUE str = rb_str_new(ptr, len); + + OBJ_TAINT(str); + return str; +} + +VALUE +rb_tainted_str_new2(ptr) + const char *ptr; +{ + VALUE str = rb_str_new2(ptr); + + OBJ_TAINT(str); + return str; +} + +static VALUE +str_new3(klass, str) + VALUE klass, str; +{ + VALUE str2 = str_alloc(klass); + + RSTRING(str2)->len = RSTRING(str)->len; + RSTRING(str2)->ptr = RSTRING(str)->ptr; + RSTRING(str2)->aux.shared = str; + FL_SET(str2, ELTS_SHARED); + OBJ_INFECT(str2, str); + + return str2; +} + +VALUE +rb_str_new3(str) + VALUE str; +{ + return str_new3(rb_obj_class(str), str); +} + +static VALUE +str_new4(klass, str) + VALUE klass, str; +{ + VALUE str2 = str_alloc(klass); + + RSTRING(str2)->len = RSTRING(str)->len; + RSTRING(str2)->ptr = RSTRING(str)->ptr; + if (FL_TEST(str, ELTS_SHARED)) { + FL_SET(str2, ELTS_SHARED); + RSTRING(str2)->aux.shared = RSTRING(str)->aux.shared; + } + else { + FL_SET(str, ELTS_SHARED); + RSTRING(str)->aux.shared = str2; + } + + return str2; +} + +VALUE +rb_str_new4(orig) + VALUE orig; +{ + VALUE klass, str; + + if (OBJ_FROZEN(orig)) return orig; + klass = rb_obj_class(orig); + if (FL_TEST(orig, ELTS_SHARED) && (str = RSTRING(orig)->aux.shared) && klass == RBASIC(str)->klass) { + long ofs; + ofs = RSTRING(str)->len - RSTRING(orig)->len; + if (ofs > 0) { + str = str_new3(klass, str); + RSTRING(str)->ptr += ofs; + RSTRING(str)->len -= ofs; + } + } + else if (FL_TEST(orig, STR_ASSOC)) { + str = str_new(klass, RSTRING(orig)->ptr, RSTRING(orig)->len); + } + else { + str = str_new4(klass, orig); + } + OBJ_INFECT(str, orig); + OBJ_FREEZE(str); + return str; +} + +VALUE +rb_str_new5(obj, ptr, len) + VALUE obj; + const char *ptr; + long len; +{ + return str_new(rb_obj_class(obj), ptr, len); +} + +#define STR_BUF_MIN_SIZE 128 + +VALUE +rb_str_buf_new(capa) + long capa; +{ + VALUE str = str_alloc(rb_cString); + + if (capa < STR_BUF_MIN_SIZE) { + capa = STR_BUF_MIN_SIZE; + } + RSTRING(str)->ptr = 0; + RSTRING(str)->len = 0; + RSTRING(str)->aux.capa = capa; + RSTRING(str)->ptr = ALLOC_N(char, capa+1); + RSTRING(str)->ptr[0] = '\0'; + + return str; +} + +VALUE +rb_str_buf_new2(ptr) + const char *ptr; +{ + VALUE str; + long len = strlen(ptr); + + str = rb_str_buf_new(len); + rb_str_buf_cat(str, ptr, len); + + return str; +} + +VALUE +rb_str_to_str(str) + VALUE str; +{ + return rb_convert_type(str, T_STRING, "String", "to_str"); +} + +static void +rb_str_shared_replace(str, str2) + VALUE str, str2; +{ + if (str == str2) return; + rb_str_modify(str); + if (!FL_TEST(str, ELTS_SHARED)) free(RSTRING(str)->ptr); + RSTRING(str)->ptr = RSTRING(str2)->ptr; + RSTRING(str)->len = RSTRING(str2)->len; + FL_UNSET(str, STR_NOCAPA); + if (FL_TEST(str2, STR_NOCAPA)) { + FL_SET(str, RBASIC(str2)->flags & STR_NOCAPA); + RSTRING(str)->aux.shared = RSTRING(str2)->aux.shared; + } + else { + RSTRING(str)->aux.capa = RSTRING(str2)->aux.capa; + } + RSTRING(str2)->ptr = 0; /* abandon str2 */ + RSTRING(str2)->len = 0; + RSTRING(str2)->aux.capa = 0; + FL_UNSET(str2, STR_NOCAPA); + if (OBJ_TAINTED(str2)) OBJ_TAINT(str); +} + +static ID id_to_s; + +VALUE +rb_obj_as_string(obj) + VALUE obj; +{ + VALUE str; + + if (TYPE(obj) == T_STRING) { + return obj; + } + str = rb_funcall(obj, id_to_s, 0); + if (TYPE(str) != T_STRING) + return rb_any_to_s(obj); + if (OBJ_TAINTED(obj)) OBJ_TAINT(str); + return str; +} + +static VALUE rb_str_replace _((VALUE, VALUE)); + +VALUE +rb_str_dup(str) + VALUE str; +{ + VALUE dup = str_alloc(rb_obj_class(str)); + rb_str_replace(dup, str); + return dup; +} + + +/* + * call-seq: + * String.new(str="") => new_str + * + * Returns a new string object containing a copy of <i>str</i>. + */ + +static VALUE +rb_str_init(argc, argv, str) + int argc; + VALUE *argv; + VALUE str; +{ + VALUE orig; + + if (rb_scan_args(argc, argv, "01", &orig) == 1) + rb_str_replace(str, orig); + return str; +} + +/* + * call-seq: + * str.length => integer + * + * Returns the length of <i>str</i>. + */ + +static VALUE +rb_str_length(str) + VALUE str; +{ + return LONG2NUM(RSTRING(str)->len); +} + +/* + * call-seq: + * str.empty? => true or false + * + * Returns <code>true</code> if <i>str</i> has a length of zero. + * + * "hello".empty? #=> false + * "".empty? #=> true + */ + +static VALUE +rb_str_empty(str) + VALUE str; +{ + if (RSTRING(str)->len == 0) + return Qtrue; + return Qfalse; +} + +/* + * call-seq: + * str + other_str => new_str + * + * Concatenation---Returns a new <code>String</code> containing + * <i>other_str</i> concatenated to <i>str</i>. + * + * "Hello from " + self.to_s #=> "Hello from main" + */ + +VALUE +rb_str_plus(str1, str2) + VALUE str1, str2; +{ + VALUE str3; + + StringValue(str2); + str3 = rb_str_new(0, RSTRING(str1)->len+RSTRING(str2)->len); + memcpy(RSTRING(str3)->ptr, RSTRING(str1)->ptr, RSTRING(str1)->len); + memcpy(RSTRING(str3)->ptr + RSTRING(str1)->len, + RSTRING(str2)->ptr, RSTRING(str2)->len); + RSTRING(str3)->ptr[RSTRING(str3)->len] = '\0'; + + if (OBJ_TAINTED(str1) || OBJ_TAINTED(str2)) + OBJ_TAINT(str3); + return str3; +} + +/* + * call-seq: + * str * integer => new_str + * + * Copy---Returns a new <code>String</code> containing <i>integer</i> copies of + * the receiver. + * + * "Ho! " * 3 #=> "Ho! Ho! Ho! " + */ + +VALUE +rb_str_times(str, times) + VALUE str; + VALUE times; +{ + VALUE str2; + long i, len; + + len = NUM2LONG(times); + if (len == 0) return rb_str_new5(str,0,0); + if (len < 0) { + rb_raise(rb_eArgError, "negative argument"); + } + if (LONG_MAX/len < RSTRING(str)->len) { + rb_raise(rb_eArgError, "argument too big"); + } + + str2 = rb_str_new5(str,0, RSTRING(str)->len*len); + for (i=0; i<len; i++) { + memcpy(RSTRING(str2)->ptr+(i*RSTRING(str)->len), + RSTRING(str)->ptr, RSTRING(str)->len); + } + RSTRING(str2)->ptr[RSTRING(str2)->len] = '\0'; + + OBJ_INFECT(str2, str); + + return str2; +} + +/* + * call-seq: + * str % arg => new_str + * + * Format---Uses <i>str</i> as a format specification, and returns the result + * of applying it to <i>arg</i>. If the format specification contains more than + * one substitution, then <i>arg</i> must be an <code>Array</code> containing + * the values to be substituted. See <code>Kernel::sprintf</code> for details + * of the format string. + * + * "%05d" % 123 #=> "00123" + * "%-5s: %08x" % [ "ID", self.id ] #=> "ID : 200e14d6" + */ + +static VALUE +rb_str_format(str, arg) + VALUE str, arg; +{ + VALUE *argv; + + if (TYPE(arg) == T_ARRAY) { + argv = ALLOCA_N(VALUE, RARRAY(arg)->len + 1); + argv[0] = str; + MEMCPY(argv+1, RARRAY(arg)->ptr, VALUE, RARRAY(arg)->len); + return rb_f_sprintf(RARRAY(arg)->len+1, argv); + } + + argv = ALLOCA_N(VALUE, 2); + argv[0] = str; + argv[1] = arg; + return rb_f_sprintf(2, argv); +} + +static int +str_independent(str) + VALUE str; +{ + if (FL_TEST(str, STR_TMPLOCK)) { + rb_raise(rb_eRuntimeError, "can't modify string; temporarily locked"); + } + if (OBJ_FROZEN(str)) rb_error_frozen("string"); + if (!OBJ_TAINTED(str) && rb_safe_level() >= 4) + rb_raise(rb_eSecurityError, "Insecure: can't modify string"); + if (!FL_TEST(str, ELTS_SHARED)) return 1; + return 0; +} + +static void +str_make_independent(str) + VALUE str; +{ + char *ptr; + + ptr = ALLOC_N(char, RSTRING(str)->len+1); + if (RSTRING(str)->ptr) { + memcpy(ptr, RSTRING(str)->ptr, RSTRING(str)->len); + } + ptr[RSTRING(str)->len] = 0; + RSTRING(str)->ptr = ptr; + RSTRING(str)->aux.capa = RSTRING(str)->len; + FL_UNSET(str, STR_NOCAPA); +} + +void +rb_str_modify(str) + VALUE str; +{ + if (!str_independent(str)) + str_make_independent(str); +} + +void +rb_str_associate(str, add) + VALUE str, add; +{ + if (FL_TEST(str, STR_ASSOC)) { + /* already associated */ + rb_ary_concat(RSTRING(str)->aux.shared, add); + } + else { + if (FL_TEST(str, ELTS_SHARED)) { + str_make_independent(str); + } + else if (RSTRING(str)->aux.capa != RSTRING(str)->len) { + RESIZE_CAPA(str, RSTRING(str)->len); + } + RSTRING(str)->aux.shared = add; + FL_SET(str, STR_ASSOC); + } +} + +VALUE +rb_str_associated(str) + VALUE str; +{ + if (FL_TEST(str, STR_ASSOC)) { + return RSTRING(str)->aux.shared; + } + return Qfalse; +} + +static char *null_str = ""; + +VALUE +rb_string_value(ptr) + volatile VALUE *ptr; +{ + VALUE s = *ptr; + if (TYPE(s) != T_STRING) { + s = rb_str_to_str(s); + *ptr = s; + } + if (!RSTRING(s)->ptr) { + FL_SET(s, ELTS_SHARED); + RSTRING(s)->ptr = null_str; + } + return s; +} + +char * +rb_string_value_ptr(ptr) + volatile VALUE *ptr; +{ + return RSTRING(rb_string_value(ptr))->ptr; +} + +char * +rb_string_value_cstr(ptr) + volatile VALUE *ptr; +{ + VALUE str = rb_string_value(ptr); + char *s = RSTRING(str)->ptr; + + if (!s || RSTRING(str)->len != strlen(s)) { + rb_raise(rb_eArgError, "string contains null byte"); + } + return s; +} + +VALUE +rb_check_string_type(str) + VALUE str; +{ + str = rb_check_convert_type(str, T_STRING, "String", "to_str"); + if (!NIL_P(str) && !RSTRING(str)->ptr) { + FL_SET(str, ELTS_SHARED); + RSTRING(str)->ptr = null_str; + } + return str; +} + +VALUE +rb_str_substr(str, beg, len) + VALUE str; + long beg, len; +{ + VALUE str2; + + if (len < 0) return Qnil; + if (beg > RSTRING(str)->len) return Qnil; + if (beg < 0) { + beg += RSTRING(str)->len; + if (beg < 0) return Qnil; + } + if (beg + len > RSTRING(str)->len) { + len = RSTRING(str)->len - beg; + } + if (len < 0) { + len = 0; + } + if (len == 0) return rb_str_new5(str,0,0); + + if (len > sizeof(struct RString)/2 && + beg + len == RSTRING(str)->len && !FL_TEST(str, STR_ASSOC)) { + str2 = rb_str_new3(rb_str_new4(str)); + RSTRING(str2)->ptr += RSTRING(str2)->len - len; + RSTRING(str2)->len = len; + } + else { + str2 = rb_str_new5(str, RSTRING(str)->ptr+beg, len); + } + OBJ_INFECT(str2, str); + + return str2; +} + +VALUE +rb_str_freeze(str) + VALUE str; +{ + return rb_obj_freeze(str); +} + +VALUE +rb_str_dup_frozen(str) + VALUE str; +{ + if (FL_TEST(str, ELTS_SHARED) && RSTRING(str)->aux.shared) { + VALUE shared = RSTRING(str)->aux.shared; + if (RSTRING(shared)->len == RSTRING(str)->len) { + OBJ_FREEZE(shared); + return shared; + } + } + if (OBJ_FROZEN(str)) return str; + str = rb_str_dup(str); + OBJ_FREEZE(str); + return str; +} + +VALUE +rb_str_locktmp(str) + VALUE str; +{ + if (FL_TEST(str, STR_TMPLOCK)) { + rb_raise(rb_eRuntimeError, "temporal locking already locked string"); + } + FL_SET(str, STR_TMPLOCK); + return str; +} + +VALUE +rb_str_unlocktmp(str) + VALUE str; +{ + if (!FL_TEST(str, STR_TMPLOCK)) { + rb_raise(rb_eRuntimeError, "temporal unlocking already unlocked string"); + } + FL_UNSET(str, STR_TMPLOCK); + return str; +} + +VALUE +rb_str_resize(str, len) + VALUE str; + long len; +{ + if (len < 0) { + rb_raise(rb_eArgError, "negative string size (or size too big)"); + } + + rb_str_modify(str); + if (len != RSTRING(str)->len) { + if (RSTRING(str)->len < len || RSTRING(str)->len - len > 1024) { + REALLOC_N(RSTRING(str)->ptr, char, len+1); + if (!FL_TEST(str, STR_NOCAPA)) { + RSTRING(str)->aux.capa = len; + } + } + RSTRING(str)->len = len; + RSTRING(str)->ptr[len] = '\0'; /* sentinel */ + } + return str; +} + +VALUE +rb_str_buf_cat(str, ptr, len) + VALUE str; + const char *ptr; + long len; +{ + long capa, total; + + if (len == 0) return str; + if (len < 0) { + rb_raise(rb_eArgError, "negative string size (or size too big)"); + } + rb_str_modify(str); + if (FL_TEST(str, STR_ASSOC)) { + FL_UNSET(str, STR_ASSOC); + capa = RSTRING(str)->aux.capa = RSTRING(str)->len; + } + else { + capa = RSTRING(str)->aux.capa; + } + total = RSTRING(str)->len+len; + if (capa <= total) { + while (total > capa) { + capa = (capa + 1) * 2; + } + RESIZE_CAPA(str, capa); + } + memcpy(RSTRING(str)->ptr + RSTRING(str)->len, ptr, len); + RSTRING(str)->len = total; + RSTRING(str)->ptr[total] = '\0'; /* sentinel */ + + return str; +} + +VALUE +rb_str_buf_cat2(str, ptr) + VALUE str; + const char *ptr; +{ + return rb_str_buf_cat(str, ptr, strlen(ptr)); +} + +VALUE +rb_str_cat(str, ptr, len) + VALUE str; + const char *ptr; + long len; +{ + if (len < 0) { + rb_raise(rb_eArgError, "negative string size (or size too big)"); + } + if (FL_TEST(str, STR_ASSOC)) { + rb_str_modify(str); + REALLOC_N(RSTRING(str)->ptr, char, RSTRING(str)->len+len); + memcpy(RSTRING(str)->ptr + RSTRING(str)->len, ptr, len); + RSTRING(str)->len += len; + RSTRING(str)->ptr[RSTRING(str)->len] = '\0'; /* sentinel */ + return str; + } + + return rb_str_buf_cat(str, ptr, len); +} + +VALUE +rb_str_cat2(str, ptr) + VALUE str; + const char *ptr; +{ + return rb_str_cat(str, ptr, strlen(ptr)); +} + +VALUE +rb_str_buf_append(str, str2) + VALUE str, str2; +{ + long capa, len; + + rb_str_modify(str); + if (FL_TEST(str, STR_ASSOC)) { + FL_UNSET(str, STR_ASSOC); + capa = RSTRING(str)->aux.capa = RSTRING(str)->len; + } + else { + capa = RSTRING(str)->aux.capa; + } + len = RSTRING(str)->len+RSTRING(str2)->len; + if (capa <= len) { + while (len > capa) { + capa = (capa + 1) * 2; + } + RESIZE_CAPA(str, capa); + } + memcpy(RSTRING(str)->ptr + RSTRING(str)->len, + RSTRING(str2)->ptr, RSTRING(str2)->len); + RSTRING(str)->len += RSTRING(str2)->len; + RSTRING(str)->ptr[RSTRING(str)->len] = '\0'; /* sentinel */ + OBJ_INFECT(str, str2); + + return str; +} + +VALUE +rb_str_append(str, str2) + VALUE str, str2; +{ + StringValue(str2); + rb_str_modify(str); + if (RSTRING(str2)->len > 0) { + if (FL_TEST(str, STR_ASSOC)) { + long len = RSTRING(str)->len+RSTRING(str2)->len; + REALLOC_N(RSTRING(str)->ptr, char, len+1); + memcpy(RSTRING(str)->ptr + RSTRING(str)->len, + RSTRING(str2)->ptr, RSTRING(str2)->len); + RSTRING(str)->ptr[len] = '\0'; /* sentinel */ + RSTRING(str)->len = len; + } + else { + return rb_str_buf_append(str, str2); + } + } + OBJ_INFECT(str, str2); + return str; +} + + +/* + * call-seq: + * str << fixnum => str + * str.concat(fixnum) => str + * str << obj => str + * str.concat(obj) => str + * + * Append---Concatenates the given object to <i>str</i>. If the object is a + * <code>Fixnum</code> between 0 and 255, it is converted to a character before + * concatenation. + * + * a = "hello " + * a << "world" #=> "hello world" + * a.concat(33) #=> "hello world!" + */ + +VALUE +rb_str_concat(str1, str2) + VALUE str1, str2; +{ + if (FIXNUM_P(str2)) { + int i = FIX2INT(str2); + if (0 <= i && i <= 0xff) { /* byte */ + char c = i; + return rb_str_cat(str1, &c, 1); + } + } + str1 = rb_str_append(str1, str2); + + return str1; +} + +int +rb_str_hash(str) + VALUE str; +{ + register long len = RSTRING(str)->len; + register char *p = RSTRING(str)->ptr; + register int key = 0; + +#ifdef HASH_ELFHASH + register unsigned int g; + + while (len--) { + key = (key << 4) + *p++; + if (g = key & 0xF0000000) + key ^= g >> 24; + key &= ~g; + } +#elif HASH_PERL + while (len--) { + key += *p++; + key += (key << 10); + key ^= (key >> 6); + } + key += (key << 3); + key ^= (key >> 11); + key += (key << 15); +#else + while (len--) { + key = key*65599 + *p; + p++; + } + key = key + (key>>5); +#endif + return key; +} + +/* + * call-seq: + * str.hash => fixnum + * + * Return a hash based on the string's length and content. + */ + +static VALUE +rb_str_hash_m(str) + VALUE str; +{ + int key = rb_str_hash(str); + return INT2FIX(key); +} + +#define lesser(a,b) (((a)>(b))?(b):(a)) + +int +rb_str_cmp(str1, str2) + VALUE str1, str2; +{ + long len; + int retval; + + len = lesser(RSTRING(str1)->len, RSTRING(str2)->len); + retval = rb_memcmp(RSTRING(str1)->ptr, RSTRING(str2)->ptr, len); + if (retval == 0) { + if (RSTRING(str1)->len == RSTRING(str2)->len) return 0; + if (RSTRING(str1)->len > RSTRING(str2)->len) return 1; + return -1; + } + if (retval > 0) return 1; + return -1; +} + + +/* + * call-seq: + * str == obj => true or false + * + * Equality---If <i>obj</i> is not a <code>String</code>, returns + * <code>false</code>. Otherwise, returns <code>true</code> if <i>str</i> + * <code><=></code> <i>obj</i> returns zero. + */ + +static VALUE +rb_str_equal(str1, str2) + VALUE str1, str2; +{ + if (str1 == str2) return Qtrue; + if (TYPE(str2) != T_STRING) { + if (!rb_respond_to(str2, rb_intern("to_str"))) { + return Qfalse; + } + return rb_equal(str2, str1); + } + if (RSTRING(str1)->len == RSTRING(str2)->len && + rb_str_cmp(str1, str2) == 0) { + return Qtrue; + } + return Qfalse; +} + +/* + * call-seq: + * str.eql?(other) => true or false + * + * Two strings are equal if the have the same length and content. + */ + +static VALUE +rb_str_eql(str1, str2) + VALUE str1, str2; +{ + if (TYPE(str2) != T_STRING || RSTRING(str1)->len != RSTRING(str2)->len) + return Qfalse; + + if (memcmp(RSTRING(str1)->ptr, RSTRING(str2)->ptr, + lesser(RSTRING(str1)->len, RSTRING(str2)->len)) == 0) + return Qtrue; + + return Qfalse; +} + +/* + * call-seq: + * str <=> other_str => -1, 0, +1 + * + * Comparison---Returns -1 if <i>other_str</i> is less than, 0 if + * <i>other_str</i> is equal to, and +1 if <i>other_str</i> is greater than + * <i>str</i>. If the strings are of different lengths, and the strings are + * equal when compared up to the shortest length, then the longer string is + * considered greater than the shorter one. If the variable <code>$=</code> is + * <code>false</code>, the comparison is based on comparing the binary values + * of each character in the string. In older versions of Ruby, setting + * <code>$=</code> allowed case-insensitive comparisons; this is now deprecated + * in favor of using <code>String#casecmp</code>. + * + * <code><=></code> is the basis for the methods <code><</code>, + * <code><=</code>, <code>></code>, <code>>=</code>, and <code>between?</code>, + * included from module <code>Comparable</code>. The method + * <code>String#==</code> does not use <code>Comparable#==</code>. + * + * "abcdef" <=> "abcde" #=> 1 + * "abcdef" <=> "abcdef" #=> 0 + * "abcdef" <=> "abcdefg" #=> -1 + * "abcdef" <=> "ABCDEF" #=> 1 + */ + +static VALUE +rb_str_cmp_m(str1, str2) + VALUE str1, str2; +{ + long result; + + if (TYPE(str2) != T_STRING) { + if (!rb_respond_to(str2, rb_intern("to_str"))) { + return Qnil; + } + else if (!rb_respond_to(str2, rb_intern("<=>"))) { + return Qnil; + } + else { + VALUE tmp = rb_funcall(str2, rb_intern("<=>"), 1, str1); + + if (NIL_P(tmp)) return Qnil; + if (!FIXNUM_P(tmp)) { + return rb_funcall(LONG2FIX(0), '-', 1, tmp); + } + result = -FIX2LONG(tmp); + } + } + else { + result = rb_str_cmp(str1, str2); + } + return LONG2NUM(result); +} + +/* + * call-seq: + * str.casecmp(other_str) => -1, 0, +1 + * + * Case-insensitive version of <code>String#<=></code>. + * + * "abcdef".casecmp("abcde") #=> 1 + * "aBcDeF".casecmp("abcdef") #=> 0 + * "abcdef".casecmp("abcdefg") #=> -1 + * "abcdef".casecmp("ABCDEF") #=> 0 + */ + +static VALUE +rb_str_casecmp(str1, str2) + VALUE str1, str2; +{ + long len; + int retval; + + StringValue(str2); + len = lesser(RSTRING(str1)->len, RSTRING(str2)->len); + retval = rb_memcicmp(RSTRING(str1)->ptr, RSTRING(str2)->ptr, len); + if (retval == 0) { + if (RSTRING(str1)->len == RSTRING(str2)->len) return INT2FIX(0); + if (RSTRING(str1)->len > RSTRING(str2)->len) return INT2FIX(1); + return INT2FIX(-1); + } + if (retval == 0) return INT2FIX(0); + if (retval > 0) return INT2FIX(1); + return INT2FIX(-1); +} + +static long +rb_str_index(str, sub, offset) + VALUE str, sub; + long offset; +{ + long pos; + + if (offset < 0) { + offset += RSTRING(str)->len; + if (offset < 0) return -1; + } + if (RSTRING(str)->len - offset < RSTRING(sub)->len) return -1; + if (RSTRING(sub)->len == 0) return offset; + pos = rb_memsearch(RSTRING(sub)->ptr, RSTRING(sub)->len, + RSTRING(str)->ptr+offset, RSTRING(str)->len-offset); + if (pos < 0) return pos; + return pos + offset; +} + + +/* + * call-seq: + * str.index(substring [, offset]) => fixnum or nil + * str.index(fixnum [, offset]) => fixnum or nil + * str.index(regexp [, offset]) => fixnum or nil + * + * Returns the index of the first occurrence of the given <i>substring</i>, + * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. Returns + * <code>nil</code> if not found. If the second parameter is present, it + * specifies the position in the string to begin the search. + * + * "hello".index('e') #=> 1 + * "hello".index('lo') #=> 3 + * "hello".index('a') #=> nil + * "hello".index(101) #=> 1 + * "hello".index(/[aeiou]/, -3) #=> 4 + */ + +static VALUE +rb_str_index_m(argc, argv, str) + int argc; + VALUE *argv; + VALUE str; +{ + VALUE sub; + VALUE initpos; + long pos; + + if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) { + pos = NUM2LONG(initpos); + } + else { + pos = 0; + } + if (pos < 0) { + pos += RSTRING(str)->len; + if (pos < 0) { + if (TYPE(sub) == T_REGEXP) { + rb_backref_set(Qnil); + } + return Qnil; + } + } + + switch (TYPE(sub)) { + case T_REGEXP: + pos = rb_reg_adjust_startpos(sub, str, pos, 0); + pos = rb_reg_search(sub, str, pos, 0); + break; + + case T_FIXNUM: + { + int c = FIX2INT(sub); + long len = RSTRING(str)->len; + char *p = RSTRING(str)->ptr; + + for (;pos<len;pos++) { + if ((unsigned char)p[pos] == c) return LONG2NUM(pos); + } + return Qnil; + } + + default: { + VALUE tmp; + + tmp = rb_check_string_type(sub); + if (NIL_P(tmp)) { + rb_raise(rb_eTypeError, "type mismatch: %s given", + rb_obj_classname(sub)); + } + sub = tmp; + } + /* fall through */ + case T_STRING: + pos = rb_str_index(str, sub, pos); + break; + } + + if (pos == -1) return Qnil; + return LONG2NUM(pos); +} + +static long +rb_str_rindex(str, sub, pos) + VALUE str, sub; + long pos; +{ + long len = RSTRING(sub)->len; + char *s, *sbeg, *t; + + /* substring longer than string */ + if (RSTRING(str)->len < len) return -1; + if (RSTRING(str)->len - pos < len) { + pos = RSTRING(str)->len - len; + } + sbeg = RSTRING(str)->ptr; + s = RSTRING(str)->ptr + pos; + t = RSTRING(sub)->ptr; + if (len) { + while (sbeg <= s) { + if (rb_memcmp(s, t, len) == 0) { + return s - RSTRING(str)->ptr; + } + s--; + } + return -1; + } + else { + return pos; + } +} + + +/* + * call-seq: + * str.rindex(substring [, fixnum]) => fixnum or nil + * str.rindex(fixnum [, fixnum]) => fixnum or nil + * str.rindex(regexp [, fixnum]) => fixnum or nil + * + * Returns the index of the last occurrence of the given <i>substring</i>, + * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. Returns + * <code>nil</code> if not found. If the second parameter is present, it + * specifies the position in the string to end the search---characters beyond + * this point will not be considered. + * + * "hello".rindex('e') #=> 1 + * "hello".rindex('l') #=> 3 + * "hello".rindex('a') #=> nil + * "hello".rindex(101) #=> 1 + * "hello".rindex(/[aeiou]/, -2) #=> 1 + */ + +static VALUE +rb_str_rindex_m(argc, argv, str) + int argc; + VALUE *argv; + VALUE str; +{ + VALUE sub; + VALUE position; + long pos; + + if (rb_scan_args(argc, argv, "11", &sub, &position) == 2) { + pos = NUM2LONG(position); + if (pos < 0) { + pos += RSTRING(str)->len; + if (pos < 0) { + if (TYPE(sub) == T_REGEXP) { + rb_backref_set(Qnil); + } + return Qnil; + } + } + if (pos > RSTRING(str)->len) pos = RSTRING(str)->len; + } + else { + pos = RSTRING(str)->len; + } + + switch (TYPE(sub)) { + case T_REGEXP: + if (RREGEXP(sub)->len) { + pos = rb_reg_adjust_startpos(sub, str, pos, 1); + pos = rb_reg_search(sub, str, pos, 1); + } + if (pos >= 0) return LONG2NUM(pos); + break; + + case T_STRING: + pos = rb_str_rindex(str, sub, pos); + if (pos >= 0) return LONG2NUM(pos); + break; + + case T_FIXNUM: + { + int c = FIX2INT(sub); + char *p = RSTRING(str)->ptr + pos; + char *pbeg = RSTRING(str)->ptr; + + if (pos == RSTRING(str)->len) { + if (pos == 0) return Qnil; + --p; + } + while (pbeg <= p) { + if ((unsigned char)*p == c) + return LONG2NUM((char*)p - RSTRING(str)->ptr); + p--; + } + return Qnil; + } + + default: + rb_raise(rb_eTypeError, "type mismatch: %s given", + rb_obj_classname(sub)); + } + return Qnil; +} + +/* + * call-seq: + * str =~ obj => fixnum or nil + * + * Match---If <i>obj</i> is a <code>Regexp</code>, use it as a pattern to match + * against <i>str</i>. If <i>obj</i> is a <code>String</code>, look for it in + * <i>str</i> (similar to <code>String#index</code>). Returns the position the + * match starts, or <code>nil</code> if there is no match. Otherwise, invokes + * <i>obj.=~</i>, passing <i>str</i> as an argument. The default + * <code>=~</code> in <code>Object</code> returns <code>false</code>. + * + * "cat o' 9 tails" =~ '\d' #=> nil + * "cat o' 9 tails" =~ /\d/ #=> 7 + * "cat o' 9 tails" =~ 9 #=> false + */ + +static VALUE +rb_str_match(x, y) + VALUE x, y; +{ + switch (TYPE(y)) { + case T_STRING: + rb_raise(rb_eTypeError, "type mismatch: String given"); + + case T_REGEXP: + return rb_reg_match(y, x); + + default: + return rb_funcall(y, rb_intern("=~"), 1, x); + } +} + + +static VALUE get_pat _((VALUE, int)); + + +/* + * call-seq: + * str.match(pattern) => matchdata or nil + * + * Converts <i>pattern</i> to a <code>Regexp</code> (if it isn't already one), + * then invokes its <code>match</code> method on <i>str</i>. If the second + * parameter is present, it specifies the position in the string to begin the + * search. + * + * 'hello'.match('(.)\1') #=> #<MatchData:0x401b3d30> + * 'hello'.match('(.)\1')[0] #=> "ll" + * 'hello'.match(/(.)\1/)[0] #=> "ll" + * 'hello'.match('xx') #=> nil + */ + +static VALUE +rb_str_match_m(argc, argv, str) + int argc; + VALUE *argv; + VALUE str; +{ + VALUE re; + if (argc < 1) + rb_raise(rb_eArgError, "wrong number of arguments (%d for 1)", argc); + re = argv[0]; + argv[0] = str; + return rb_funcall2(get_pat(re, 0), rb_intern("match"), argc, argv); +} + +static char +succ_char(s) + char *s; +{ + char c = *s; + + /* numerics */ + if ('0' <= c && c < '9') (*s)++; + else if (c == '9') { + *s = '0'; + return '1'; + } + /* small alphabets */ + else if ('a' <= c && c < 'z') (*s)++; + else if (c == 'z') { + return *s = 'a'; + } + /* capital alphabets */ + else if ('A' <= c && c < 'Z') (*s)++; + else if (c == 'Z') { + return *s = 'A'; + } + return 0; +} + + +/* + * call-seq: + * str.succ => new_str + * str.next => new_str + * + * Returns the successor to <i>str</i>. The successor is calculated by + * incrementing characters starting from the rightmost alphanumeric (or + * the rightmost character if there are no alphanumerics) in the + * string. Incrementing a digit always results in another digit, and + * incrementing a letter results in another letter of the same case. + * Incrementing nonalphanumerics uses the underlying character set's + * collating sequence. + * + * If the increment generates a ``carry,'' the character to the left of + * it is incremented. This process repeats until there is no carry, + * adding an additional character if necessary. + * + * "abcd".succ #=> "abce" + * "THX1138".succ #=> "THX1139" + * "<<koala>>".succ #=> "<<koalb>>" + * "1999zzz".succ #=> "2000aaa" + * "ZZZ9999".succ #=> "AAAA0000" + * "***".succ #=> "**+" + */ + +static VALUE +rb_str_succ(orig) + VALUE orig; +{ + VALUE str; + char *sbeg, *s; + int c = -1; + long n = 0; + + str = rb_str_new5(orig, RSTRING(orig)->ptr, RSTRING(orig)->len); + OBJ_INFECT(str, orig); + if (RSTRING(str)->len == 0) return str; + + sbeg = RSTRING(str)->ptr; s = sbeg + RSTRING(str)->len - 1; + + while (sbeg <= s) { + if (ISALNUM(*s)) { + if ((c = succ_char(s)) == 0) break; + n = s - sbeg; + } + s--; + } + if (c == -1) { /* str contains no alnum */ + sbeg = RSTRING(str)->ptr; s = sbeg + RSTRING(str)->len - 1; + c = '\001'; + while (sbeg <= s) { + if ((*s += 1) != 0) break; + s--; + } + } + if (s < sbeg) { + RESIZE_CAPA(str, RSTRING(str)->len + 1); + s = RSTRING(str)->ptr + n; + memmove(s+1, s, RSTRING(str)->len - n); + *s = c; + RSTRING(str)->len += 1; + RSTRING(str)->ptr[RSTRING(str)->len] = '\0'; + } + + return str; +} + + +/* + * call-seq: + * str.succ! => str + * str.next! => str + * + * Equivalent to <code>String#succ</code>, but modifies the receiver in + * place. + */ + +static VALUE +rb_str_succ_bang(str) + VALUE str; +{ + rb_str_shared_replace(str, rb_str_succ(str)); + + return str; +} + +VALUE +rb_str_upto(beg, end, excl) + VALUE beg, end; + int excl; +{ + VALUE current, after_end; + ID succ = rb_intern("succ"); + int n; + + StringValue(end); + n = rb_str_cmp(beg, end); + if (n > 0 || (excl && n == 0)) return beg; + after_end = rb_funcall(end, succ, 0, 0); + current = beg; + while (!rb_str_equal(current, after_end)) { + rb_yield(current); + if (!excl && rb_str_equal(current, end)) break; + current = rb_funcall(current, succ, 0, 0); + StringValue(current); + if (excl && rb_str_equal(current, end)) break; + StringValue(current); + if (RSTRING(current)->len > RSTRING(end)->len) + break; + } + + return beg; +} + + +/* + * call-seq: + * str.upto(other_str) {|s| block } => str + * + * Iterates through successive values, starting at <i>str</i> and + * ending at <i>other_str</i> inclusive, passing each value in turn to + * the block. The <code>String#succ</code> method is used to generate + * each value. + * + * "a8".upto("b6") {|s| print s, ' ' } + * for s in "a8".."b6" + * print s, ' ' + * end + * + * <em>produces:</em> + * + * a8 a9 b0 b1 b2 b3 b4 b5 b6 + * a8 a9 b0 b1 b2 b3 b4 b5 b6 + */ + +static VALUE +rb_str_upto_m(beg, end) + VALUE beg, end; +{ + return rb_str_upto(beg, end, Qfalse); +} + +static VALUE +rb_str_subpat(str, re, nth) + VALUE str, re; + int nth; +{ + if (rb_reg_search(re, str, 0, 0) >= 0) { + return rb_reg_nth_match(nth, rb_backref_get()); + } + return Qnil; +} + +static VALUE +rb_str_aref(str, indx) + VALUE str; + VALUE indx; +{ + long idx; + + switch (TYPE(indx)) { + case T_FIXNUM: + idx = FIX2LONG(indx); + + num_index: + if (idx < 0) { + idx = RSTRING(str)->len + idx; + } + if (idx < 0 || RSTRING(str)->len <= idx) { + return Qnil; + } + return INT2FIX(RSTRING(str)->ptr[idx] & 0xff); + + case T_REGEXP: + return rb_str_subpat(str, indx, 0); + + case T_STRING: + if (rb_str_index(str, indx, 0) != -1) + return rb_str_dup(indx); + return Qnil; + + default: + /* check if indx is Range */ + { + long beg, len; + switch (rb_range_beg_len(indx, &beg, &len, RSTRING(str)->len, 0)) { + case Qfalse: + break; + case Qnil: + return Qnil; + default: + return rb_str_substr(str, beg, len); + } + } + idx = NUM2LONG(indx); + goto num_index; + } + return Qnil; /* not reached */ +} + + +/* + * call-seq: + * str[fixnum] => fixnum or nil + * str[fixnum, fixnum] => new_str or nil + * str[range] => new_str or nil + * str[regexp] => new_str or nil + * str[regexp, fixnum] => new_str or nil + * str[other_str] => new_str or nil + * str.slice(fixnum) => fixnum or nil + * str.slice(fixnum, fixnum) => new_str or nil + * str.slice(range) => new_str or nil + * str.slice(regexp) => new_str or nil + * str.slice(regexp, fixnum) => new_str or nil + * str.slice(other_str) => new_str or nil + * + * Element Reference---If passed a single <code>Fixnum</code>, returns the code + * of the character at that position. If passed two <code>Fixnum</code> + * objects, returns a substring starting at the offset given by the first, and + * a length given by the second. If given a range, a substring containing + * characters at offsets given by the range is returned. In all three cases, if + * an offset is negative, it is counted from the end of <i>str</i>. Returns + * <code>nil</code> if the initial offset falls outside the string, the length + * is negative, or the beginning of the range is greater than the end. + * + * If a <code>Regexp</code> is supplied, the matching portion of <i>str</i> is + * returned. If a numeric parameter follows the regular expression, that + * component of the <code>MatchData</code> is returned instead. If a + * <code>String</code> is given, that string is returned if it occurs in + * <i>str</i>. In both cases, <code>nil</code> is returned if there is no + * match. + * + * a = "hello there" + * a[1] #=> 101 + * a[1,3] #=> "ell" + * a[1..3] #=> "ell" + * a[-3,2] #=> "er" + * a[-4..-2] #=> "her" + * a[12..-1] #=> nil + * a[-2..-4] #=> "" + * a[/[aeiou](.)\1/] #=> "ell" + * a[/[aeiou](.)\1/, 0] #=> "ell" + * a[/[aeiou](.)\1/, 1] #=> "l" + * a[/[aeiou](.)\1/, 2] #=> nil + * a["lo"] #=> "lo" + * a["bye"] #=> nil + */ + +static VALUE +rb_str_aref_m(argc, argv, str) + int argc; + VALUE *argv; + VALUE str; +{ + if (argc == 2) { + if (TYPE(argv[0]) == T_REGEXP) { + return rb_str_subpat(str, argv[0], NUM2INT(argv[1])); + } + return rb_str_substr(str, NUM2LONG(argv[0]), NUM2LONG(argv[1])); + } + if (argc != 1) { + rb_raise(rb_eArgError, "wrong number of arguments (%d for 1)", argc); + } + return rb_str_aref(str, argv[0]); +} + +static void +rb_str_splice(str, beg, len, val) + VALUE str; + long beg, len; + VALUE val; +{ + if (len < 0) rb_raise(rb_eIndexError, "negative length %ld", len); + + StringValue(val); + rb_str_modify(str); + + if (RSTRING(str)->len < beg) { + out_of_range: + rb_raise(rb_eIndexError, "index %ld out of string", beg); + } + if (beg < 0) { + if (-beg > RSTRING(str)->len) { + goto out_of_range; + } + beg += RSTRING(str)->len; + } + if (RSTRING(str)->len < beg + len) { + len = RSTRING(str)->len - beg; + } + + if (len < RSTRING(val)->len) { + /* expand string */ + RESIZE_CAPA(str, RSTRING(str)->len + RSTRING(val)->len - len + 1); + } + + if (RSTRING(val)->len != len) { + memmove(RSTRING(str)->ptr + beg + RSTRING(val)->len, + RSTRING(str)->ptr + beg + len, + RSTRING(str)->len - (beg + len)); + } + if (RSTRING(str)->len < beg && len < 0) { + MEMZERO(RSTRING(str)->ptr + RSTRING(str)->len, char, -len); + } + if (RSTRING(val)->len > 0) { + memmove(RSTRING(str)->ptr+beg, RSTRING(val)->ptr, RSTRING(val)->len); + } + RSTRING(str)->len += RSTRING(val)->len - len; + if (RSTRING(str)->ptr) { + RSTRING(str)->ptr[RSTRING(str)->len] = '\0'; + } + OBJ_INFECT(str, val); +} + +void +rb_str_update(str, beg, len, val) + VALUE str; + long beg, len; + VALUE val; +{ + rb_str_splice(str, beg, len, val); +} + +static void +rb_str_subpat_set(str, re, nth, val) + VALUE str, re; + int nth; + VALUE val; +{ + VALUE match; + long start, end, len; + + if (rb_reg_search(re, str, 0, 0) < 0) { + rb_raise(rb_eIndexError, "regexp not matched"); + } + match = rb_backref_get(); + if (nth >= RMATCH(match)->regs->num_regs) { + out_of_range: + rb_raise(rb_eIndexError, "index %d out of regexp", nth); + } + if (nth < 0) { + if (-nth >= RMATCH(match)->regs->num_regs) { + goto out_of_range; + } + nth += RMATCH(match)->regs->num_regs; + } + + start = RMATCH(match)->BEG(nth); + if (start == -1) { + rb_raise(rb_eIndexError, "regexp group %d not matched", nth); + } + end = RMATCH(match)->END(nth); + len = end - start; + rb_str_splice(str, start, len, val); +} + +static VALUE +rb_str_aset(str, indx, val) + VALUE str; + VALUE indx, val; +{ + long idx, beg; + + switch (TYPE(indx)) { + case T_FIXNUM: + num_index: + idx = FIX2LONG(indx); + if (RSTRING(str)->len <= idx) { + out_of_range: + rb_raise(rb_eIndexError, "index %ld out of string", idx); + } + if (idx < 0) { + if (-idx > RSTRING(str)->len) + goto out_of_range; + idx += RSTRING(str)->len; + } + if (FIXNUM_P(val)) { + rb_str_modify(str); + if (RSTRING(str)->len == idx) { + RSTRING(str)->len += 1; + RESIZE_CAPA(str, RSTRING(str)->len); + } + RSTRING(str)->ptr[idx] = FIX2INT(val) & 0xff; + } + else { + rb_str_splice(str, idx, 1, val); + } + return val; + + case T_REGEXP: + rb_str_subpat_set(str, indx, 0, val); + return val; + + case T_STRING: + beg = rb_str_index(str, indx, 0); + if (beg < 0) { + rb_raise(rb_eIndexError, "string not matched"); + } + rb_str_splice(str, beg, RSTRING(indx)->len, val); + return val; + + default: + /* check if indx is Range */ + { + long beg, len; + if (rb_range_beg_len(indx, &beg, &len, RSTRING(str)->len, 2)) { + rb_str_splice(str, beg, len, val); + return val; + } + } + idx = NUM2LONG(indx); + goto num_index; + } +} + +/* + * call-seq: + * str[fixnum] = fixnum + * str[fixnum] = new_str + * str[fixnum, fixnum] = new_str + * str[range] = aString + * str[regexp] = new_str + * str[regexp, fixnum] = new_str + * str[other_str] = new_str + * + * Element Assignment---Replaces some or all of the content of <i>str</i>. The + * portion of the string affected is determined using the same criteria as + * <code>String#[]</code>. If the replacement string is not the same length as + * the text it is replacing, the string will be adjusted accordingly. If the + * regular expression or string is used as the index doesn't match a position + * in the string, <code>IndexError</code> is raised. If the regular expression + * form is used, the optional second <code>Fixnum</code> allows you to specify + * which portion of the match to replace (effectively using the + * <code>MatchData</code> indexing rules. The forms that take a + * <code>Fixnum</code> will raise an <code>IndexError</code> if the value is + * out of range; the <code>Range</code> form will raise a + * <code>RangeError</code>, and the <code>Regexp</code> and <code>String</code> + * forms will silently ignore the assignment. + */ + +static VALUE +rb_str_aset_m(argc, argv, str) + int argc; + VALUE *argv; + VALUE str; +{ + if (argc == 3) { + if (TYPE(argv[0]) == T_REGEXP) { + rb_str_subpat_set(str, argv[0], NUM2INT(argv[1]), argv[2]); + } + else { + rb_str_splice(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]), argv[2]); + } + return argv[2]; + } + if (argc != 2) { + rb_raise(rb_eArgError, "wrong number of arguments (%d for 2)", argc); + } + return rb_str_aset(str, argv[0], argv[1]); +} + +/* + * call-seq: + * str.insert(index, other_str) => str + * + * Inserts <i>other_str</i> before the character at the given + * <i>index</i>, modifying <i>str</i>. Negative indices count from the + * end of the string, and insert <em>after</em> the given character. + * The intent is insert <i>aString</i> so that it starts at the given + * <i>index</i>. + * + * "abcd".insert(0, 'X') #=> "Xabcd" + * "abcd".insert(3, 'X') #=> "abcXd" + * "abcd".insert(4, 'X') #=> "abcdX" + * "abcd".insert(-3, 'X') #=> "abXcd" + * "abcd".insert(-1, 'X') #=> "abcdX" + */ + +static VALUE +rb_str_insert(str, idx, str2) + VALUE str, idx, str2; +{ + long pos = NUM2LONG(idx); + + if (pos == -1) { + pos = RSTRING(str)->len; + } + else if (pos < 0) { + pos++; + } + rb_str_splice(str, pos, 0, str2); + return str; +} + + +/* + * call-seq: + * str.slice!(fixnum) => fixnum or nil + * str.slice!(fixnum, fixnum) => new_str or nil + * str.slice!(range) => new_str or nil + * str.slice!(regexp) => new_str or nil + * str.slice!(other_str) => new_str or nil + * + * Deletes the specified portion from <i>str</i>, and returns the portion + * deleted. The forms that take a <code>Fixnum</code> will raise an + * <code>IndexError</code> if the value is out of range; the <code>Range</code> + * form will raise a <code>RangeError</code>, and the <code>Regexp</code> and + * <code>String</code> forms will silently ignore the assignment. + * + * string = "this is a string" + * string.slice!(2) #=> 105 + * string.slice!(3..6) #=> " is " + * string.slice!(/s.*t/) #=> "sa st" + * string.slice!("r") #=> "r" + * string #=> "thing" + */ + +static VALUE +rb_str_slice_bang(argc, argv, str) + int argc; + VALUE *argv; + VALUE str; +{ + VALUE result; + VALUE buf[3]; + int i; + + if (argc < 1 || 2 < argc) { + rb_raise(rb_eArgError, "wrong number of arguments (%d for 1)", argc); + } + for (i=0; i<argc; i++) { + buf[i] = argv[i]; + } + buf[i] = rb_str_new(0,0); + result = rb_str_aref_m(argc, buf, str); + if (!NIL_P(result)) { + rb_str_aset_m(argc+1, buf, str); + } + return result; +} + +static VALUE +get_pat(pat, quote) + VALUE pat; + int quote; +{ + VALUE val; + + switch (TYPE(pat)) { + case T_REGEXP: + return pat; + + case T_STRING: + break; + + default: + val = rb_check_string_type(pat); + if (NIL_P(val)) { + Check_Type(pat, T_REGEXP); + } + pat = val; + } + + if (quote) { + pat = rb_reg_quote(pat); + } + + return rb_reg_regcomp(pat); +} + + +/* + * call-seq: + * str.sub!(pattern, replacement) => str or nil + * str.sub!(pattern) {|match| block } => str or nil + * + * Performs the substitutions of <code>String#sub</code> in place, + * returning <i>str</i>, or <code>nil</code> if no substitutions were + * performed. + */ + +static VALUE +rb_str_sub_bang(argc, argv, str) + int argc; + VALUE *argv; + VALUE str; +{ + VALUE pat, repl, match; + struct re_registers *regs; + int iter = 0; + int tainted = 0; + long plen; + + if (argc == 1 && rb_block_given_p()) { + iter = 1; + } + else if (argc == 2) { + repl = argv[1]; + StringValue(repl); + if (OBJ_TAINTED(repl)) tainted = 1; + } + else { + rb_raise(rb_eArgError, "wrong number of arguments (%d for 2)", argc); + } + + pat = get_pat(argv[0], 1); + if (rb_reg_search(pat, str, 0, 0) >= 0) { + rb_str_modify(str); + match = rb_backref_get(); + regs = RMATCH(match)->regs; + + if (iter) { + char *p = RSTRING(str)->ptr; long len = RSTRING(str)->len; + + rb_match_busy(match); + repl = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match))); + str_mod_check(str, p, len); + str_frozen_check(str); + rb_backref_set(match); + } + else { + repl = rb_reg_regsub(repl, str, regs); + } + if (OBJ_TAINTED(repl)) tainted = 1; + plen = END(0) - BEG(0); + if (RSTRING(repl)->len > plen) { + RESIZE_CAPA(str, RSTRING(str)->len + RSTRING(repl)->len - plen); + } + if (RSTRING(repl)->len != plen) { + memmove(RSTRING(str)->ptr + BEG(0) + RSTRING(repl)->len, + RSTRING(str)->ptr + BEG(0) + plen, + RSTRING(str)->len - BEG(0) - plen); + } + memcpy(RSTRING(str)->ptr + BEG(0), + RSTRING(repl)->ptr, RSTRING(repl)->len); + RSTRING(str)->len += RSTRING(repl)->len - plen; + RSTRING(str)->ptr[RSTRING(str)->len] = '\0'; + if (tainted) OBJ_TAINT(str); + + return str; + } + return Qnil; +} + + +/* + * call-seq: + * str.sub(pattern, replacement) => new_str + * str.sub(pattern) {|match| block } => new_str + * + * Returns a copy of <i>str</i> with the <em>first</em> occurrence of + * <i>pattern</i> replaced with either <i>replacement</i> or the value of the + * block. The <i>pattern</i> will typically be a <code>Regexp</code>; if it is + * a <code>String</code> then no regular expression metacharacters will be + * interpreted (that is <code>/\d/</code> will match a digit, but + * <code>'\d'</code> will match a backslash followed by a 'd'). + * + * If the method call specifies <i>replacement</i>, special variables such as + * <code>$&</code> will not be useful, as substitution into the string occurs + * before the pattern match starts. However, the sequences <code>\1</code>, + * <code>\2</code>, etc., may be used. + * + * In the block form, the current match string is passed in as a parameter, and + * variables such as <code>$1</code>, <code>$2</code>, <code>$`</code>, + * <code>$&</code>, and <code>$'</code> will be set appropriately. The value + * returned by the block will be substituted for the match on each call. + * + * The result inherits any tainting in the original string or any supplied + * replacement string. + * + * "hello".sub(/[aeiou]/, '*') #=> "h*llo" + * "hello".sub(/([aeiou])/, '<\1>') #=> "h<e>llo" + * "hello".sub(/./) {|s| s[0].to_s + ' ' } #=> "104 ello" + */ + +static VALUE +rb_str_sub(argc, argv, str) + int argc; + VALUE *argv; + VALUE str; +{ + str = rb_str_dup(str); + rb_str_sub_bang(argc, argv, str); + return str; +} + +static VALUE +str_gsub(argc, argv, str, bang) + int argc; + VALUE *argv; + VALUE str; + int bang; +{ + VALUE pat, val, repl, match, dest; + struct re_registers *regs; + long beg, n; + long offset, blen, slen, len; + int iter = 0; + char *buf, *bp, *sp, *cp; + int tainted = 0; + + if (argc == 1 && rb_block_given_p()) { + iter = 1; + } + else if (argc == 2) { + repl = argv[1]; + StringValue(repl); + if (OBJ_TAINTED(repl)) tainted = 1; + } + else { + rb_raise(rb_eArgError, "wrong number of arguments (%d for 2)", argc); + } + + pat = get_pat(argv[0], 1); + offset=0; n=0; + beg = rb_reg_search(pat, str, 0, 0); + if (beg < 0) { + if (bang) return Qnil; /* no match, no substitution */ + return rb_str_dup(str); + } + + blen = RSTRING(str)->len + 30; /* len + margin */ + dest = str_new(0, 0, blen); + buf = RSTRING(dest)->ptr; + bp = buf; + sp = cp = RSTRING(str)->ptr; + slen = RSTRING(str)->len; + + rb_str_locktmp(dest); + while (beg >= 0) { + n++; + match = rb_backref_get(); + regs = RMATCH(match)->regs; + if (iter) { + rb_match_busy(match); + val = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match))); + str_mod_check(str, sp, slen); + if (bang) str_frozen_check(str); + if (val == dest) { /* paranoid chack [ruby-dev:24827] */ + rb_raise(rb_eRuntimeError, "block should not cheat"); + } + rb_backref_set(match); + } + else { + val = rb_reg_regsub(repl, str, regs); + } + if (OBJ_TAINTED(val)) tainted = 1; + len = (bp - buf) + (beg - offset) + RSTRING(val)->len + 3; + if (blen < len) { + while (blen < len) blen *= 2; + len = bp - buf; + RESIZE_CAPA(dest, blen); + RSTRING(dest)->len = blen; + buf = RSTRING(dest)->ptr; + bp = buf + len; + } + len = beg - offset; /* copy pre-match substr */ + memcpy(bp, cp, len); + bp += len; + memcpy(bp, RSTRING(val)->ptr, RSTRING(val)->len); + bp += RSTRING(val)->len; + offset = END(0); + if (BEG(0) == END(0)) { + /* + * Always consume at least one character of the input string + * in order to prevent infinite loops. + */ + if (RSTRING(str)->len <= END(0)) break; + len = mbclen2(RSTRING(str)->ptr[END(0)], pat); + memcpy(bp, RSTRING(str)->ptr+END(0), len); + bp += len; + offset = END(0) + len; + } + cp = RSTRING(str)->ptr + offset; + if (offset > RSTRING(str)->len) break; + beg = rb_reg_search(pat, str, offset, 0); + } + if (RSTRING(str)->len > offset) { + len = bp - buf; + if (blen - len < RSTRING(str)->len - offset) { + blen = len + RSTRING(str)->len - offset; + RESIZE_CAPA(dest, blen); + buf = RSTRING(dest)->ptr; + bp = buf + len; + } + memcpy(bp, cp, RSTRING(str)->len - offset); + bp += RSTRING(str)->len - offset; + } + rb_backref_set(match); + *bp = '\0'; + rb_str_unlocktmp(dest); + if (bang) { + if (str_independent(str)) { + free(RSTRING(str)->ptr); + } + FL_UNSET(str, STR_NOCAPA); + RSTRING(str)->ptr = buf; + RSTRING(str)->aux.capa = blen; + RSTRING(dest)->ptr = 0; + RSTRING(dest)->len = 0; + } + else { + RBASIC(dest)->klass = rb_obj_class(str); + OBJ_INFECT(dest, str); + str = dest; + } + RSTRING(str)->len = bp - buf; + + if (tainted) OBJ_TAINT(str); + return str; +} + + +/* + * call-seq: + * str.gsub!(pattern, replacement) => str or nil + * str.gsub!(pattern) {|match| block } => str or nil + * + * Performs the substitutions of <code>String#gsub</code> in place, returning + * <i>str</i>, or <code>nil</code> if no substitutions were performed. + */ + +static VALUE +rb_str_gsub_bang(argc, argv, str) + int argc; + VALUE *argv; + VALUE str; +{ + return str_gsub(argc, argv, str, 1); +} + + +/* + * call-seq: + * str.gsub(pattern, replacement) => new_str + * str.gsub(pattern) {|match| block } => new_str + * + * Returns a copy of <i>str</i> with <em>all</em> occurrences of <i>pattern</i> + * replaced with either <i>replacement</i> or the value of the block. The + * <i>pattern</i> will typically be a <code>Regexp</code>; if it is a + * <code>String</code> then no regular expression metacharacters will be + * interpreted (that is <code>/\d/</code> will match a digit, but + * <code>'\d'</code> will match a backslash followed by a 'd'). + * + * If a string is used as the replacement, special variables from the match + * (such as <code>$&</code> and <code>$1</code>) cannot be substituted into it, + * as substitution into the string occurs before the pattern match + * starts. However, the sequences <code>\1</code>, <code>\2</code>, and so on + * may be used to interpolate successive groups in the match. + * + * In the block form, the current match string is passed in as a parameter, and + * variables such as <code>$1</code>, <code>$2</code>, <code>$`</code>, + * <code>$&</code>, and <code>$'</code> will be set appropriately. The value + * returned by the block will be substituted for the match on each call. + * + * The result inherits any tainting in the original string or any supplied + * replacement string. + * + * "hello".gsub(/[aeiou]/, '*') #=> "h*ll*" + * "hello".gsub(/([aeiou])/, '<\1>') #=> "h<e>ll<o>" + * "hello".gsub(/./) {|s| s[0].to_s + ' '} #=> "104 101 108 108 111 " + */ + +static VALUE +rb_str_gsub(argc, argv, str) + int argc; + VALUE *argv; + VALUE str; +{ + return str_gsub(argc, argv, str, 0); +} + + +/* + * call-seq: + * str.replace(other_str) => str + * + * Replaces the contents and taintedness of <i>str</i> with the corresponding + * values in <i>other_str</i>. + * + * s = "hello" #=> "hello" + * s.replace "world" #=> "world" + */ + +static VALUE +rb_str_replace(str, str2) + VALUE str, str2; +{ + if (str == str2) return str; + + StringValue(str2); + if (FL_TEST(str2, ELTS_SHARED)) { + if (str_independent(str)) { + free(RSTRING(str)->ptr); + } + RSTRING(str)->len = RSTRING(str2)->len; + RSTRING(str)->ptr = RSTRING(str2)->ptr; + FL_SET(str, ELTS_SHARED); + FL_UNSET(str, STR_ASSOC); + RSTRING(str)->aux.shared = RSTRING(str2)->aux.shared; + } + else { + rb_str_modify(str); + rb_str_resize(str, RSTRING(str2)->len); + memcpy(RSTRING(str)->ptr, RSTRING(str2)->ptr, RSTRING(str2)->len); + if (FL_TEST(str2, STR_ASSOC)) { + FL_SET(str, STR_ASSOC); + RSTRING(str)->aux.shared = RSTRING(str2)->aux.shared; + } + } + + OBJ_INFECT(str, str2); + return str; +} + +/* + * call-seq: + * string.clear -> string + * + * Makes string empty. + * + * a = "abcde" + * a.clear #=> "" + */ + +static VALUE +rb_str_clear(str) + VALUE str; +{ + /* rb_str_modify() */ /* no need for str_make_independent */ + if (str_independent(str)) { + free(RSTRING(str)->ptr); + } + RSTRING(str)->aux.shared = 0; + FL_UNSET(str, STR_NOCAPA); + FL_SET(str, ELTS_SHARED); + RSTRING(str)->ptr = null_str; + RARRAY(str)->len = 0; + return str; +} + +static VALUE +uscore_get() +{ + VALUE line; + + line = rb_lastline_get(); + if (TYPE(line) != T_STRING) { + rb_raise(rb_eTypeError, "$_ value need to be String (%s given)", + NIL_P(line) ? "nil" : rb_obj_classname(line)); + } + return line; +} + +/* + * call-seq: + * sub!(pattern, replacement) => $_ or nil + * sub!(pattern) {|...| block } => $_ or nil + * + * Equivalent to <code>$_.sub!(<i>args</i>)</code>. + */ + +static VALUE +rb_f_sub_bang(argc, argv) + int argc; + VALUE *argv; +{ + return rb_str_sub_bang(argc, argv, uscore_get()); +} + +/* + * call-seq: + * sub(pattern, replacement) => $_ + * sub(pattern) { block } => $_ + * + * Equivalent to <code>$_.sub(<i>args</i>)</code>, except that + * <code>$_</code> will be updated if substitution occurs. + */ + +static VALUE +rb_f_sub(argc, argv) + int argc; + VALUE *argv; +{ + VALUE str = rb_str_dup(uscore_get()); + + if (NIL_P(rb_str_sub_bang(argc, argv, str))) + return str; + rb_lastline_set(str); + return str; +} + +/* + * call-seq: + * gsub!(pattern, replacement) => string or nil + * gsub!(pattern) {|...| block } => string or nil + * + * Equivalent to <code>Kernel::gsub</code>, except <code>nil</code> is + * returned if <code>$_</code> is not modified. + * + * $_ = "quick brown fox" + * gsub! /cat/, '*' #=> nil + * $_ #=> "quick brown fox" + */ + +static VALUE +rb_f_gsub_bang(argc, argv) + int argc; + VALUE *argv; +{ + return rb_str_gsub_bang(argc, argv, uscore_get()); +} + +/* + * call-seq: + * gsub(pattern, replacement) => string + * gsub(pattern) {|...| block } => string + * + * Equivalent to <code>$_.gsub...</code>, except that <code>$_</code> + * receives the modified result. + * + * $_ = "quick brown fox" + * gsub /[aeiou]/, '*' #=> "q**ck br*wn f*x" + * $_ #=> "q**ck br*wn f*x" + */ + +static VALUE +rb_f_gsub(argc, argv) + int argc; + VALUE *argv; +{ + VALUE str = rb_str_dup(uscore_get()); + + if (NIL_P(rb_str_gsub_bang(argc, argv, str))) + return str; + rb_lastline_set(str); + return str; +} + + +/* + * call-seq: + * str.reverse! => str + * + * Reverses <i>str</i> in place. + */ + +static VALUE +rb_str_reverse_bang(str) + VALUE str; +{ + char *s, *e; + char c; + + if (RSTRING(str)->len > 1) { + rb_str_modify(str); + s = RSTRING(str)->ptr; + e = s + RSTRING(str)->len - 1; + while (s < e) { + c = *s; + *s++ = *e; + *e-- = c; + } + } + return str; +} + + +/* + * call-seq: + * str.reverse => new_str + * + * Returns a new string with the characters from <i>str</i> in reverse order. + * + * "stressed".reverse #=> "desserts" + */ + +static VALUE +rb_str_reverse(str) + VALUE str; +{ + VALUE obj; + char *s, *e, *p; + + if (RSTRING(str)->len <= 1) return rb_str_dup(str); + + obj = rb_str_new5(str, 0, RSTRING(str)->len); + s = RSTRING(str)->ptr; e = s + RSTRING(str)->len - 1; + p = RSTRING(obj)->ptr; + + while (e >= s) { + *p++ = *e--; + } + OBJ_INFECT(obj, str); + + return obj; +} + + +/* + * call-seq: + * str.include? other_str => true or false + * str.include? fixnum => true or false + * + * Returns <code>true</code> if <i>str</i> contains the given string or + * character. + * + * "hello".include? "lo" #=> true + * "hello".include? "ol" #=> false + * "hello".include? ?h #=> true + */ + +static VALUE +rb_str_include(str, arg) + VALUE str, arg; +{ + long i; + + if (FIXNUM_P(arg)) { + if (memchr(RSTRING(str)->ptr, FIX2INT(arg), RSTRING(str)->len)) + return Qtrue; + return Qfalse; + } + + StringValue(arg); + i = rb_str_index(str, arg, 0); + + if (i == -1) return Qfalse; + return Qtrue; +} + + +/* + * call-seq: + * str.to_i(base=10) => integer + * + * Returns the result of interpreting leading characters in <i>str</i> as an + * integer base <i>base</i> (2, 8, 10, or 16). Extraneous characters past the + * end of a valid number are ignored. If there is not a valid number at the + * start of <i>str</i>, <code>0</code> is returned. This method never raises an + * exception. + * + * "12345".to_i #=> 12345 + * "99 red balloons".to_i #=> 99 + * "0a".to_i #=> 0 + * "0a".to_i(16) #=> 10 + * "hello".to_i #=> 0 + * "1100101".to_i(2) #=> 101 + * "1100101".to_i(8) #=> 294977 + * "1100101".to_i(10) #=> 1100101 + * "1100101".to_i(16) #=> 17826049 + */ + +static VALUE +rb_str_to_i(argc, argv, str) + int argc; + VALUE *argv; + VALUE str; +{ + VALUE b; + int base; + + rb_scan_args(argc, argv, "01", &b); + if (argc == 0) base = 10; + else base = NUM2INT(b); + + if (base < 0) { + rb_raise(rb_eArgError, "illegal radix %d", base); + } + return rb_str_to_inum(str, base, Qfalse); +} + + +/* + * call-seq: + * str.to_f => float + * + * Returns the result of interpreting leading characters in <i>str</i> as a + * floating point number. Extraneous characters past the end of a valid number + * are ignored. If there is not a valid number at the start of <i>str</i>, + * <code>0.0</code> is returned. This method never raises an exception. + * + * "123.45e1".to_f #=> 1234.5 + * "45.67 degrees".to_f #=> 45.67 + * "thx1138".to_f #=> 0.0 + */ + +static VALUE +rb_str_to_f(str) + VALUE str; +{ + return rb_float_new(rb_str_to_dbl(str, Qfalse)); +} + + +/* + * call-seq: + * str.to_s => str + * str.to_str => str + * + * Returns the receiver. + */ + +static VALUE +rb_str_to_s(str) + VALUE str; +{ + if (rb_obj_class(str) != rb_cString) { + VALUE dup = str_alloc(rb_cString); + rb_str_replace(dup, str); + return dup; + } + return str; +} + +#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{')) + +/* + * call-seq: + * str.inspect => string + * + * Returns a printable version of _str_, with special characters + * escaped. + * + * str = "hello" + * str[3] = 8 + * str.inspect #=> "hel\010o" + */ + +VALUE +rb_str_inspect(str) + VALUE str; +{ + char *p, *pend; + VALUE result = rb_str_buf_new2("\""); + char s[5]; + + p = RSTRING(str)->ptr; pend = p + RSTRING(str)->len; + while (p < pend) { + char c = *p++; + if (ismbchar(c) && p < pend) { + int len = mbclen(c); + rb_str_buf_cat(result, p - 1, len); + p += len - 1; + } + else if (c == '"'|| c == '\\' || (c == '#' && IS_EVSTR(p, pend))) { + s[0] = '\\'; s[1] = c; + rb_str_buf_cat(result, s, 2); + } + else if (ISPRINT(c)) { + s[0] = c; + rb_str_buf_cat(result, s, 1); + } + else if (c == '\n') { + s[0] = '\\'; s[1] = 'n'; + rb_str_buf_cat(result, s, 2); + } + else if (c == '\r') { + s[0] = '\\'; s[1] = 'r'; + rb_str_buf_cat(result, s, 2); + } + else if (c == '\t') { + s[0] = '\\'; s[1] = 't'; + rb_str_buf_cat(result, s, 2); + } + else if (c == '\f') { + s[0] = '\\'; s[1] = 'f'; + rb_str_buf_cat(result, s, 2); + } + else if (c == '\013') { + s[0] = '\\'; s[1] = 'v'; + rb_str_buf_cat(result, s, 2); + } + else if (c == '\007') { + s[0] = '\\'; s[1] = 'a'; + rb_str_buf_cat(result, s, 2); + } + else if (c == 033) { + s[0] = '\\'; s[1] = 'e'; + rb_str_buf_cat(result, s, 2); + } + else { + sprintf(s, "\\%03o", c & 0377); + rb_str_buf_cat2(result, s); + } + } + rb_str_buf_cat2(result, "\""); + + OBJ_INFECT(result, str); + return result; +} + + +/* + * call-seq: + * str.dump => new_str + * + * Produces a version of <i>str</i> with all nonprinting characters replaced by + * <code>\nnn</code> notation and all special characters escaped. + */ + +VALUE +rb_str_dump(str) + VALUE str; +{ + long len; + char *p, *pend; + char *q, *qend; + VALUE result; + + len = 2; /* "" */ + p = RSTRING(str)->ptr; pend = p + RSTRING(str)->len; + while (p < pend) { + char c = *p++; + switch (c) { + case '"': case '\\': + case '\n': case '\r': + case '\t': case '\f': + case '\013': case '\007': case '\033': + len += 2; + break; + + case '#': + len += IS_EVSTR(p, pend) ? 2 : 1; + break; + + default: + if (ISPRINT(c)) { + len++; + } + else { + len += 4; /* \nnn */ + } + break; + } + } + + result = rb_str_new5(str, 0, len); + p = RSTRING(str)->ptr; pend = p + RSTRING(str)->len; + q = RSTRING(result)->ptr; qend = q + len; + + *q++ = '"'; + while (p < pend) { + char c = *p++; + + if (c == '"' || c == '\\') { + *q++ = '\\'; + *q++ = c; + } + else if (c == '#') { + if (IS_EVSTR(p, pend)) *q++ = '\\'; + *q++ = '#'; + } + else if (ISPRINT(c)) { + *q++ = c; + } + else if (c == '\n') { + *q++ = '\\'; + *q++ = 'n'; + } + else if (c == '\r') { + *q++ = '\\'; + *q++ = 'r'; + } + else if (c == '\t') { + *q++ = '\\'; + *q++ = 't'; + } + else if (c == '\f') { + *q++ = '\\'; + *q++ = 'f'; + } + else if (c == '\013') { + *q++ = '\\'; + *q++ = 'v'; + } + else if (c == '\007') { + *q++ = '\\'; + *q++ = 'a'; + } + else if (c == '\033') { + *q++ = '\\'; + *q++ = 'e'; + } + else { + *q++ = '\\'; + sprintf(q, "%03o", c&0xff); + q += 3; + } + } + *q++ = '"'; + + OBJ_INFECT(result, str); + return result; +} + + +/* + * call-seq: + * str.upcase! => str or nil + * + * Upcases the contents of <i>str</i>, returning <code>nil</code> if no changes + * were made. + */ + +static VALUE +rb_str_upcase_bang(str) + VALUE str; +{ + char *s, *send; + int modify = 0; + + rb_str_modify(str); + s = RSTRING(str)->ptr; send = s + RSTRING(str)->len; + while (s < send) { + if (ismbchar(*s)) { + s+=mbclen(*s) - 1; + } + else if (ISLOWER(*s)) { + *s = toupper(*s); + modify = 1; + } + s++; + } + + if (modify) return str; + return Qnil; +} + + +/* + * call-seq: + * str.upcase => new_str + * + * Returns a copy of <i>str</i> with all lowercase letters replaced with their + * uppercase counterparts. The operation is locale insensitive---only + * characters ``a'' to ``z'' are affected. + * + * "hEllO".upcase #=> "HELLO" + */ + +static VALUE +rb_str_upcase(str) + VALUE str; +{ + str = rb_str_dup(str); + rb_str_upcase_bang(str); + return str; +} + + +/* + * call-seq: + * str.downcase! => str or nil + * + * Downcases the contents of <i>str</i>, returning <code>nil</code> if no + * changes were made. + */ + +static VALUE +rb_str_downcase_bang(str) + VALUE str; +{ + char *s, *send; + int modify = 0; + + rb_str_modify(str); + s = RSTRING(str)->ptr; send = s + RSTRING(str)->len; + while (s < send) { + if (ismbchar(*s)) { + s+=mbclen(*s) - 1; + } + else if (ISUPPER(*s)) { + *s = tolower(*s); + modify = 1; + } + s++; + } + + if (modify) return str; + return Qnil; +} + + +/* + * call-seq: + * str.downcase => new_str + * + * Returns a copy of <i>str</i> with all uppercase letters replaced with their + * lowercase counterparts. The operation is locale insensitive---only + * characters ``A'' to ``Z'' are affected. + * + * "hEllO".downcase #=> "hello" + */ + +static VALUE +rb_str_downcase(str) + VALUE str; +{ + str = rb_str_dup(str); + rb_str_downcase_bang(str); + return str; +} + + +/* + * call-seq: + * str.capitalize! => str or nil + * + * Modifies <i>str</i> by converting the first character to uppercase and the + * remainder to lowercase. Returns <code>nil</code> if no changes are made. + * + * a = "hello" + * a.capitalize! #=> "Hello" + * a #=> "Hello" + * a.capitalize! #=> nil + */ + +static VALUE +rb_str_capitalize_bang(str) + VALUE str; +{ + char *s, *send; + int modify = 0; + + rb_str_modify(str); + if (RSTRING(str)->len == 0 || !RSTRING(str)->ptr) return Qnil; + s = RSTRING(str)->ptr; send = s + RSTRING(str)->len; + if (ISLOWER(*s)) { + *s = toupper(*s); + modify = 1; + } + while (++s < send) { + if (ismbchar(*s)) { + s+=mbclen(*s) - 1; + } + else if (ISUPPER(*s)) { + *s = tolower(*s); + modify = 1; + } + } + if (modify) return str; + return Qnil; +} + + +/* + * call-seq: + * str.capitalize => new_str + * + * Returns a copy of <i>str</i> with the first character converted to uppercase + * and the remainder to lowercase. + * + * "hello".capitalize #=> "Hello" + * "HELLO".capitalize #=> "Hello" + * "123ABC".capitalize #=> "123abc" + */ + +static VALUE +rb_str_capitalize(str) + VALUE str; +{ + str = rb_str_dup(str); + rb_str_capitalize_bang(str); + return str; +} + + +/* + * call-seq: + * str.swapcase! => str or nil + * + * Equivalent to <code>String#swapcase</code>, but modifies the receiver in + * place, returning <i>str</i>, or <code>nil</code> if no changes were made. + */ + +static VALUE +rb_str_swapcase_bang(str) + VALUE str; +{ + char *s, *send; + int modify = 0; + + rb_str_modify(str); + s = RSTRING(str)->ptr; send = s + RSTRING(str)->len; + while (s < send) { + if (ismbchar(*s)) { + s+=mbclen(*s) - 1; + } + else if (ISUPPER(*s)) { + *s = tolower(*s); + modify = 1; + } + else if (ISLOWER(*s)) { + *s = toupper(*s); + modify = 1; + } + s++; + } + + if (modify) return str; + return Qnil; +} + + +/* + * call-seq: + * str.swapcase => new_str + * + * Returns a copy of <i>str</i> with uppercase alphabetic characters converted + * to lowercase and lowercase characters converted to uppercase. + * + * "Hello".swapcase #=> "hELLO" + * "cYbEr_PuNk11".swapcase #=> "CyBeR_pUnK11" + */ + +static VALUE +rb_str_swapcase(str) + VALUE str; +{ + str = rb_str_dup(str); + rb_str_swapcase_bang(str); + return str; +} + +typedef unsigned char *USTR; + +struct tr { + int gen, now, max; + char *p, *pend; +}; + +static int +trnext(t) + struct tr *t; +{ + for (;;) { + if (!t->gen) { + if (t->p == t->pend) return -1; + if (t->p < t->pend - 1 && *t->p == '\\') { + t->p++; + } + t->now = *(USTR)t->p++; + if (t->p < t->pend - 1 && *t->p == '-') { + t->p++; + if (t->p < t->pend) { + if (t->now > *(USTR)t->p) { + t->p++; + continue; + } + t->gen = 1; + t->max = *(USTR)t->p++; + } + } + return t->now; + } + else if (++t->now < t->max) { + return t->now; + } + else { + t->gen = 0; + return t->max; + } + } +} + +static VALUE rb_str_delete_bang _((int,VALUE*,VALUE)); + +static VALUE +tr_trans(str, src, repl, sflag) + VALUE str, src, repl; + int sflag; +{ + struct tr trsrc, trrepl; + int cflag = 0; + int trans[256]; + int i, c, modify = 0; + char *s, *send; + + StringValue(src); + StringValue(repl); + if (RSTRING(str)->len == 0 || !RSTRING(str)->ptr) return Qnil; + trsrc.p = RSTRING(src)->ptr; trsrc.pend = trsrc.p + RSTRING(src)->len; + if (RSTRING(src)->len >= 2 && RSTRING(src)->ptr[0] == '^') { + cflag++; + trsrc.p++; + } + if (RSTRING(repl)->len == 0) { + return rb_str_delete_bang(1, &src, str); + } + trrepl.p = RSTRING(repl)->ptr; + trrepl.pend = trrepl.p + RSTRING(repl)->len; + trsrc.gen = trrepl.gen = 0; + trsrc.now = trrepl.now = 0; + trsrc.max = trrepl.max = 0; + + if (cflag) { + for (i=0; i<256; i++) { + trans[i] = 1; + } + while ((c = trnext(&trsrc)) >= 0) { + trans[c & 0xff] = -1; + } + while ((c = trnext(&trrepl)) >= 0) + /* retrieve last replacer */; + for (i=0; i<256; i++) { + if (trans[i] >= 0) { + trans[i] = trrepl.now; + } + } + } + else { + int r; + + for (i=0; i<256; i++) { + trans[i] = -1; + } + while ((c = trnext(&trsrc)) >= 0) { + r = trnext(&trrepl); + if (r == -1) r = trrepl.now; + trans[c & 0xff] = r; + } + } + + rb_str_modify(str); + s = RSTRING(str)->ptr; send = s + RSTRING(str)->len; + if (sflag) { + char *t = s; + int c0, last = -1; + + while (s < send) { + c0 = *s++; + if ((c = trans[c0 & 0xff]) >= 0) { + if (last == c) continue; + last = c; + *t++ = c & 0xff; + modify = 1; + } + else { + last = -1; + *t++ = c0; + } + } + if (RSTRING(str)->len > (t - RSTRING(str)->ptr)) { + RSTRING(str)->len = (t - RSTRING(str)->ptr); + modify = 1; + *t = '\0'; + } + } + else { + while (s < send) { + if ((c = trans[*s & 0xff]) >= 0) { + *s = c & 0xff; + modify = 1; + } + s++; + } + } + + if (modify) return str; + return Qnil; +} + + +/* + * call-seq: + * str.tr!(from_str, to_str) => str or nil + * + * Translates <i>str</i> in place, using the same rules as + * <code>String#tr</code>. Returns <i>str</i>, or <code>nil</code> if no + * changes were made. + */ + +static VALUE +rb_str_tr_bang(str, src, repl) + VALUE str, src, repl; +{ + return tr_trans(str, src, repl, 0); +} + + +/* + * call-seq: + * str.tr(from_str, to_str) => new_str + * + * Returns a copy of <i>str</i> with the characters in <i>from_str</i> replaced + * by the corresponding characters in <i>to_str</i>. If <i>to_str</i> is + * shorter than <i>from_str</i>, it is padded with its last character. Both + * strings may use the c1--c2 notation to denote ranges of characters, and + * <i>from_str</i> may start with a <code>^</code>, which denotes all + * characters except those listed. + * + * "hello".tr('aeiou', '*') #=> "h*ll*" + * "hello".tr('^aeiou', '*') #=> "*e**o" + * "hello".tr('el', 'ip') #=> "hippo" + * "hello".tr('a-y', 'b-z') #=> "ifmmp" + */ + +static VALUE +rb_str_tr(str, src, repl) + VALUE str, src, repl; +{ + str = rb_str_dup(str); + tr_trans(str, src, repl, 0); + return str; +} + +static void +tr_setup_table(str, table, init) + VALUE str; + char table[256]; + int init; +{ + char buf[256]; + struct tr tr; + int i, c; + int cflag = 0; + + tr.p = RSTRING(str)->ptr; tr.pend = tr.p + RSTRING(str)->len; + tr.gen = tr.now = tr.max = 0; + if (RSTRING(str)->len > 1 && RSTRING(str)->ptr[0] == '^') { + cflag = 1; + tr.p++; + } + + if (init) { + for (i=0; i<256; i++) { + table[i] = 1; + } + } + for (i=0; i<256; i++) { + buf[i] = cflag; + } + while ((c = trnext(&tr)) >= 0) { + buf[c & 0xff] = !cflag; + } + for (i=0; i<256; i++) { + table[i] = table[i] && buf[i]; + } +} + + +/* + * call-seq: + * str.delete!([other_str]+>) => str or nil + * + * Performs a <code>delete</code> operation in place, returning <i>str</i>, or + * <code>nil</code> if <i>str</i> was not modified. + */ + +static VALUE +rb_str_delete_bang(argc, argv, str) + int argc; + VALUE *argv; + VALUE str; +{ + char *s, *send, *t; + char squeez[256]; + int modify = 0; + int init = 1; + int i; + + if (argc < 1) { + rb_raise(rb_eArgError, "wrong number of arguments"); + } + for (i=0; i<argc; i++) { + VALUE s = argv[i]; + + StringValue(s); + tr_setup_table(s, squeez, init); + init = 0; + } + + rb_str_modify(str); + s = t = RSTRING(str)->ptr; + if (!s || RSTRING(str)->len == 0) return Qnil; + send = s + RSTRING(str)->len; + while (s < send) { + if (squeez[*s & 0xff]) + modify = 1; + else + *t++ = *s; + s++; + } + *t = '\0'; + RSTRING(str)->len = t - RSTRING(str)->ptr; + + if (modify) return str; + return Qnil; +} + + +/* + * call-seq: + * str.delete([other_str]+) => new_str + * + * Returns a copy of <i>str</i> with all characters in the intersection of its + * arguments deleted. Uses the same rules for building the set of characters as + * <code>String#count</code>. + * + * "hello".delete "l","lo" #=> "heo" + * "hello".delete "lo" #=> "he" + * "hello".delete "aeiou", "^e" #=> "hell" + * "hello".delete "ej-m" #=> "ho" + */ + +static VALUE +rb_str_delete(argc, argv, str) + int argc; + VALUE *argv; + VALUE str; +{ + str = rb_str_dup(str); + rb_str_delete_bang(argc, argv, str); + return str; +} + + +/* + * call-seq: + * str.squeeze!([other_str]*) => str or nil + * + * Squeezes <i>str</i> in place, returning either <i>str</i>, or + * <code>nil</code> if no changes were made. + */ + +static VALUE +rb_str_squeeze_bang(argc, argv, str) + int argc; + VALUE *argv; + VALUE str; +{ + char squeez[256]; + char *s, *send, *t; + int c, save, modify = 0; + int init = 1; + int i; + + if (argc == 0) { + for (i=0; i<256; i++) { + squeez[i] = 1; + } + } + else { + for (i=0; i<argc; i++) { + VALUE s = argv[i]; + + StringValue(s); + tr_setup_table(s, squeez, init); + init = 0; + } + } + + rb_str_modify(str); + s = t = RSTRING(str)->ptr; + if (!s || RSTRING(str)->len == 0) return Qnil; + send = s + RSTRING(str)->len; + save = -1; + while (s < send) { + c = *s++ & 0xff; + if (c != save || !squeez[c]) { + *t++ = save = c; + } + } + *t = '\0'; + if (t - RSTRING(str)->ptr != RSTRING(str)->len) { + RSTRING(str)->len = t - RSTRING(str)->ptr; + modify = 1; + } + + if (modify) return str; + return Qnil; +} + + +/* + * call-seq: + * str.squeeze([other_str]*) => new_str + * + * Builds a set of characters from the <i>other_str</i> parameter(s) using the + * procedure described for <code>String#count</code>. Returns a new string + * where runs of the same character that occur in this set are replaced by a + * single character. If no arguments are given, all runs of identical + * characters are replaced by a single character. + * + * "yellow moon".squeeze #=> "yelow mon" + * " now is the".squeeze(" ") #=> " now is the" + * "putters shoot balls".squeeze("m-z") #=> "puters shot balls" + */ + +static VALUE +rb_str_squeeze(argc, argv, str) + int argc; + VALUE *argv; + VALUE str; +{ + str = rb_str_dup(str); + rb_str_squeeze_bang(argc, argv, str); + return str; +} + + +/* + * call-seq: + * str.tr_s!(from_str, to_str) => str or nil + * + * Performs <code>String#tr_s</code> processing on <i>str</i> in place, + * returning <i>str</i>, or <code>nil</code> if no changes were made. + */ + +static VALUE +rb_str_tr_s_bang(str, src, repl) + VALUE str, src, repl; +{ + return tr_trans(str, src, repl, 1); +} + + +/* + * call-seq: + * str.tr_s(from_str, to_str) => new_str + * + * Processes a copy of <i>str</i> as described under <code>String#tr</code>, + * then removes duplicate characters in regions that were affected by the + * translation. + * + * "hello".tr_s('l', 'r') #=> "hero" + * "hello".tr_s('el', '*') #=> "h*o" + * "hello".tr_s('el', 'hx') #=> "hhxo" + */ + +static VALUE +rb_str_tr_s(str, src, repl) + VALUE str, src, repl; +{ + str = rb_str_dup(str); + tr_trans(str, src, repl, 1); + return str; +} + + +/* + * call-seq: + * str.count([other_str]+) => fixnum + * + * Each <i>other_str</i> parameter defines a set of characters to count. The + * intersection of these sets defines the characters to count in + * <i>str</i>. Any <i>other_str</i> that starts with a caret (^) is + * negated. The sequence c1--c2 means all characters between c1 and c2. + * + * a = "hello world" + * a.count "lo" #=> 5 + * a.count "lo", "o" #=> 2 + * a.count "hello", "^l" #=> 4 + * a.count "ej-m" #=> 4 + */ + +static VALUE +rb_str_count(argc, argv, str) + int argc; + VALUE *argv; + VALUE str; +{ + char table[256]; + char *s, *send; + int init = 1; + int i; + + if (argc < 1) { + rb_raise(rb_eArgError, "wrong number of arguments"); + } + for (i=0; i<argc; i++) { + VALUE s = argv[i]; + + StringValue(s); + tr_setup_table(s, table, init); + init = 0; + } + + s = RSTRING(str)->ptr; + if (!s || RSTRING(str)->len == 0) return INT2FIX(0); + send = s + RSTRING(str)->len; + i = 0; + while (s < send) { + if (table[*s++ & 0xff]) { + i++; + } + } + return INT2NUM(i); +} + + +/* + * call-seq: + * str.split(pattern=$;, [limit]) => anArray + * + * Divides <i>str</i> into substrings based on a delimiter, returning an array + * of these substrings. + * + * If <i>pattern</i> is a <code>String</code>, then its contents are used as + * the delimiter when splitting <i>str</i>. If <i>pattern</i> is a single + * space, <i>str</i> is split on whitespace, with leading whitespace and runs + * of contiguous whitespace characters ignored. + * + * If <i>pattern</i> is a <code>Regexp</code>, <i>str</i> is divided where the + * pattern matches. Whenever the pattern matches a zero-length string, + * <i>str</i> is split into individual characters. + * + * If <i>pattern</i> is omitted, the value of <code>$;</code> is used. If + * <code>$;</code> is <code>nil</code> (which is the default), <i>str</i> is + * split on whitespace as if ` ' were specified. + * + * If the <i>limit</i> parameter is omitted, trailing null fields are + * suppressed. If <i>limit</i> is a positive number, at most that number of + * fields will be returned (if <i>limit</i> is <code>1</code>, the entire + * string is returned as the only entry in an array). If negative, there is no + * limit to the number of fields returned, and trailing null fields are not + * suppressed. + * + * " now's the time".split #=> ["now's", "the", "time"] + * " now's the time".split(' ') #=> ["now's", "the", "time"] + * " now's the time".split(/ /) #=> ["", "now's", "", "the", "time"] + * "1, 2.34,56, 7".split(%r{,\s*}) #=> ["1", "2.34", "56", "7"] + * "hello".split(//) #=> ["h", "e", "l", "l", "o"] + * "hello".split(//, 3) #=> ["h", "e", "llo"] + * "hi mom".split(%r{\s*}) #=> ["h", "i", "m", "o", "m"] + * + * "mellow yellow".split("ello") #=> ["m", "w y", "w"] + * "1,2,,3,4,,".split(',') #=> ["1", "2", "", "3", "4"] + * "1,2,,3,4,,".split(',', 4) #=> ["1", "2", "", "3,4,,"] + * "1,2,,3,4,,".split(',', -4) #=> ["1", "2", "", "3", "4", "", ""] + */ + +static VALUE +rb_str_split_m(argc, argv, str) + int argc; + VALUE *argv; + VALUE str; +{ + VALUE spat; + VALUE limit; + int awk_split = Qfalse; + long beg, end, i = 0; + int lim = 0; + VALUE result, tmp; + + if (rb_scan_args(argc, argv, "02", &spat, &limit) == 2) { + lim = NUM2INT(limit); + if (lim <= 0) limit = Qnil; + else if (lim == 1) { + if (RSTRING(str)->len == 0) + return rb_ary_new2(0); + return rb_ary_new3(1, str); + } + i = 1; + } + + if (NIL_P(spat)) { + if (!NIL_P(rb_fs)) { + spat = rb_fs; + goto fs_set; + } + awk_split = Qtrue; + } + else { + fs_set: + if (TYPE(spat) == T_STRING && RSTRING(spat)->len == 1) { + if (RSTRING(spat)->ptr[0] == ' ') { + awk_split = Qtrue; + } + else { + spat = rb_reg_regcomp(rb_reg_quote(spat)); + } + } + else { + spat = get_pat(spat, 1); + } + } + + result = rb_ary_new(); + beg = 0; + if (awk_split) { + char *ptr = RSTRING(str)->ptr; + long len = RSTRING(str)->len; + char *eptr = ptr + len; + int skip = 1; + + for (end = beg = 0; ptr<eptr; ptr++) { + if (skip) { + if (ISSPACE(*ptr)) { + beg++; + } + else { + end = beg+1; + skip = 0; + if (!NIL_P(limit) && lim <= i) break; + } + } + else { + if (ISSPACE(*ptr)) { + rb_ary_push(result, rb_str_substr(str, beg, end-beg)); + skip = 1; + beg = end + 1; + if (!NIL_P(limit)) ++i; + } + else { + end++; + } + } + } + } + else { + long start = beg; + long idx; + int last_null = 0; + struct re_registers *regs; + + while ((end = rb_reg_search(spat, str, start, 0)) >= 0) { + regs = RMATCH(rb_backref_get())->regs; + if (start == end && BEG(0) == END(0)) { + if (!RSTRING(str)->ptr) { + rb_ary_push(result, rb_str_new("", 0)); + break; + } + else if (last_null == 1) { + rb_ary_push(result, rb_str_substr(str, beg, mbclen2(RSTRING(str)->ptr[beg],spat))); + beg = start; + } + else { + start += mbclen2(RSTRING(str)->ptr[start],spat); + last_null = 1; + continue; + } + } + else { + rb_ary_push(result, rb_str_substr(str, beg, end-beg)); + beg = start = END(0); + } + last_null = 0; + + for (idx=1; idx < regs->num_regs; idx++) { + if (BEG(idx) == -1) continue; + if (BEG(idx) == END(idx)) + tmp = rb_str_new5(str, 0, 0); + else + tmp = rb_str_substr(str, BEG(idx), END(idx)-BEG(idx)); + rb_ary_push(result, tmp); + } + if (!NIL_P(limit) && lim <= ++i) break; + } + } + if (RSTRING(str)->len > 0 && (!NIL_P(limit) || RSTRING(str)->len > beg || lim < 0)) { + if (RSTRING(str)->len == beg) + tmp = rb_str_new5(str, 0, 0); + else + tmp = rb_str_substr(str, beg, RSTRING(str)->len-beg); + rb_ary_push(result, tmp); + } + if (NIL_P(limit) && lim == 0) { + while (RARRAY(result)->len > 0 && + RSTRING(RARRAY(result)->ptr[RARRAY(result)->len-1])->len == 0) + rb_ary_pop(result); + } + + return result; +} + +VALUE +rb_str_split(str, sep0) + VALUE str; + const char *sep0; +{ + VALUE sep; + + StringValue(str); + sep = rb_str_new2(sep0); + return rb_str_split_m(1, &sep, str); +} + +/* + * call-seq: + * split([pattern [, limit]]) => array + * + * Equivalent to <code>$_.split(<i>pattern</i>, <i>limit</i>)</code>. + * See <code>String#split</code>. + */ + +static VALUE +rb_f_split(argc, argv) + int argc; + VALUE *argv; +{ + return rb_str_split_m(argc, argv, uscore_get()); +} + +/* + * call-seq: + * str.each(separator=$/) {|substr| block } => str + * str.each_line(separator=$/) {|substr| block } => str + * + * Splits <i>str</i> using the supplied parameter as the record separator + * (<code>$/</code> by default), passing each substring in turn to the supplied + * block. If a zero-length record separator is supplied, the string is split on + * <code>\n</code> characters, except that multiple successive newlines are + * appended together. + * + * print "Example one\n" + * "hello\nworld".each {|s| p s} + * print "Example two\n" + * "hello\nworld".each('l') {|s| p s} + * print "Example three\n" + * "hello\n\n\nworld".each('') {|s| p s} + * + * <em>produces:</em> + * + * Example one + * "hello\n" + * "world" + * Example two + * "hel" + * "l" + * "o\nworl" + * "d" + * Example three + * "hello\n\n\n" + * "world" + */ + +static VALUE +rb_str_each_line(argc, argv, str) + int argc; + VALUE *argv; + VALUE str; +{ + VALUE rs; + int newline; + char *p = RSTRING(str)->ptr, *pend = p + RSTRING(str)->len, *s; + char *ptr = p; + long len = RSTRING(str)->len, rslen; + VALUE line; + + if (rb_scan_args(argc, argv, "01", &rs) == 0) { + rs = rb_rs; + } + + if (NIL_P(rs)) { + rb_yield(str); + return str; + } + StringValue(rs); + rslen = RSTRING(rs)->len; + if (rslen == 0) { + newline = '\n'; + } + else { + newline = RSTRING(rs)->ptr[rslen-1]; + } + + for (s = p, p += rslen; p < pend; p++) { + if (rslen == 0 && *p == '\n') { + if (*++p != '\n') continue; + while (*p == '\n') p++; + } + if (RSTRING(str)->ptr < p && p[-1] == newline && + (rslen <= 1 || + rb_memcmp(RSTRING(rs)->ptr, p-rslen, rslen) == 0)) { + line = rb_str_new5(str, s, p - s); + OBJ_INFECT(line, str); + rb_yield(line); + str_mod_check(str, ptr, len); + s = p; + } + } + + if (s != pend) { + if (p > pend) p = pend; + line = rb_str_new5(str, s, p - s); + OBJ_INFECT(line, str); + rb_yield(line); + } + + return str; +} + + +/* + * call-seq: + * str.each_byte {|fixnum| block } => str + * + * Passes each byte in <i>str</i> to the given block. + * + * "hello".each_byte {|c| print c, ' ' } + * + * <em>produces:</em> + * + * 104 101 108 108 111 + */ + +static VALUE +rb_str_each_byte(str) + VALUE str; +{ + long i; + + for (i=0; i<RSTRING(str)->len; i++) { + rb_yield(INT2FIX(RSTRING(str)->ptr[i] & 0xff)); + } + return str; +} + + +/* + * call-seq: + * str.chop! => str or nil + * + * Processes <i>str</i> as for <code>String#chop</code>, returning <i>str</i>, + * or <code>nil</code> if <i>str</i> is the empty string. See also + * <code>String#chomp!</code>. + */ + +static VALUE +rb_str_chop_bang(str) + VALUE str; +{ + if (RSTRING(str)->len > 0) { + rb_str_modify(str); + RSTRING(str)->len--; + if (RSTRING(str)->ptr[RSTRING(str)->len] == '\n') { + if (RSTRING(str)->len > 0 && + RSTRING(str)->ptr[RSTRING(str)->len-1] == '\r') { + RSTRING(str)->len--; + } + } + RSTRING(str)->ptr[RSTRING(str)->len] = '\0'; + return str; + } + return Qnil; +} + + +/* + * call-seq: + * str.chop => new_str + * + * Returns a new <code>String</code> with the last character removed. If the + * string ends with <code>\r\n</code>, both characters are removed. Applying + * <code>chop</code> to an empty string returns an empty + * string. <code>String#chomp</code> is often a safer alternative, as it leaves + * the string unchanged if it doesn't end in a record separator. + * + * "string\r\n".chop #=> "string" + * "string\n\r".chop #=> "string\n" + * "string\n".chop #=> "string" + * "string".chop #=> "strin" + * "x".chop.chop #=> "" + */ + +static VALUE +rb_str_chop(str) + VALUE str; +{ + str = rb_str_dup(str); + rb_str_chop_bang(str); + return str; +} + + +/* + * call-seq: + * chop! => $_ or nil + * + * Equivalent to <code>$_.chop!</code>. + * + * a = "now\r\n" + * $_ = a + * chop! #=> "now" + * chop! #=> "no" + * chop! #=> "n" + * chop! #=> "" + * chop! #=> nil + * $_ #=> "" + * a #=> "" + */ + +static VALUE +rb_f_chop_bang(str) + VALUE str; +{ + return rb_str_chop_bang(uscore_get()); +} + +/* + * call-seq: + * chop => string + * + * Equivalent to <code>($_.dup).chop!</code>, except <code>nil</code> + * is never returned. See <code>String#chop!</code>. + * + * a = "now\r\n" + * $_ = a + * chop #=> "now" + * $_ #=> "now" + * chop #=> "no" + * chop #=> "n" + * chop #=> "" + * chop #=> "" + * a #=> "now\r\n" + */ + +static VALUE +rb_f_chop() +{ + VALUE str = uscore_get(); + + if (RSTRING(str)->len > 0) { + str = rb_str_dup(str); + rb_str_chop_bang(str); + rb_lastline_set(str); + } + return str; +} + + +/* + * call-seq: + * str.chomp!(separator=$/) => str or nil + * + * Modifies <i>str</i> in place as described for <code>String#chomp</code>, + * returning <i>str</i>, or <code>nil</code> if no modifications were made. + */ + +static VALUE +rb_str_chomp_bang(argc, argv, str) + int argc; + VALUE *argv; + VALUE str; +{ + VALUE rs; + int newline; + char *p; + long len, rslen; + + if (rb_scan_args(argc, argv, "01", &rs) == 0) { + len = RSTRING(str)->len; + if (len == 0) return Qnil; + p = RSTRING(str)->ptr; + rs = rb_rs; + if (rs == rb_default_rs) { + smart_chomp: + rb_str_modify(str); + if (RSTRING(str)->ptr[len-1] == '\n') { + RSTRING(str)->len--; + if (RSTRING(str)->len > 0 && + RSTRING(str)->ptr[RSTRING(str)->len-1] == '\r') { + RSTRING(str)->len--; + } + } + else if (RSTRING(str)->ptr[len-1] == '\r') { + RSTRING(str)->len--; + } + else { + return Qnil; + } + RSTRING(str)->ptr[RSTRING(str)->len] = '\0'; + return str; + } + } + if (NIL_P(rs)) return Qnil; + StringValue(rs); + len = RSTRING(str)->len; + if (len == 0) return Qnil; + p = RSTRING(str)->ptr; + rslen = RSTRING(rs)->len; + if (rslen == 0) { + while (len>0 && p[len-1] == '\n') { + len--; + if (len>0 && p[len-1] == '\r') + len--; + } + if (len < RSTRING(str)->len) { + rb_str_modify(str); + RSTRING(str)->len = len; + RSTRING(str)->ptr[len] = '\0'; + return str; + } + return Qnil; + } + if (rslen > len) return Qnil; + newline = RSTRING(rs)->ptr[rslen-1]; + if (rslen == 1 && newline == '\n') + goto smart_chomp; + + if (p[len-1] == newline && + (rslen <= 1 || + rb_memcmp(RSTRING(rs)->ptr, p+len-rslen, rslen) == 0)) { + rb_str_modify(str); + RSTRING(str)->len -= rslen; + RSTRING(str)->ptr[RSTRING(str)->len] = '\0'; + return str; + } + return Qnil; +} + + +/* + * call-seq: + * str.chomp(separator=$/) => new_str + * + * Returns a new <code>String</code> with the given record separator removed + * from the end of <i>str</i> (if present). If <code>$/</code> has not been + * changed from the default Ruby record separator, then <code>chomp</code> also + * removes carriage return characters (that is it will remove <code>\n</code>, + * <code>\r</code>, and <code>\r\n</code>). + * + * "hello".chomp #=> "hello" + * "hello\n".chomp #=> "hello" + * "hello\r\n".chomp #=> "hello" + * "hello\n\r".chomp #=> "hello\n" + * "hello\r".chomp #=> "hello" + * "hello \n there".chomp #=> "hello \n there" + * "hello".chomp("llo") #=> "he" + */ + +static VALUE +rb_str_chomp(argc, argv, str) + int argc; + VALUE *argv; + VALUE str; +{ + str = rb_str_dup(str); + rb_str_chomp_bang(argc, argv, str); + return str; +} + +/* + * call-seq: + * chomp! => $_ or nil + * chomp!(string) => $_ or nil + * + * Equivalent to <code>$_.chomp!(<em>string</em>)</code>. See + * <code>String#chomp!</code> + * + * $_ = "now\n" + * chomp! #=> "now" + * $_ #=> "now" + * chomp! "x" #=> nil + * $_ #=> "now" + */ + +static VALUE +rb_f_chomp_bang(argc, argv) + int argc; + VALUE *argv; +{ + return rb_str_chomp_bang(argc, argv, uscore_get()); +} + +/* + * call-seq: + * chomp => $_ + * chomp(string) => $_ + * + * Equivalent to <code>$_ = $_.chomp(<em>string</em>)</code>. See + * <code>String#chomp</code>. + * + * $_ = "now\n" + * chomp #=> "now" + * $_ #=> "now" + * chomp "ow" #=> "n" + * $_ #=> "n" + * chomp "xxx" #=> "n" + * $_ #=> "n" + */ + +static VALUE +rb_f_chomp(argc, argv) + int argc; + VALUE *argv; +{ + VALUE str = uscore_get(); + VALUE dup = rb_str_dup(str); + + if (NIL_P(rb_str_chomp_bang(argc, argv, dup))) + return str; + rb_lastline_set(dup); + return dup; +} + + +/* + * call-seq: + * str.lstrip! => self or nil + * + * Removes leading whitespace from <i>str</i>, returning <code>nil</code> if no + * change was made. See also <code>String#rstrip!</code> and + * <code>String#strip!</code>. + * + * " hello ".lstrip #=> "hello " + * "hello".lstrip! #=> nil + */ + +static VALUE +rb_str_lstrip_bang(str) + VALUE str; +{ + char *s, *t, *e; + + s = RSTRING(str)->ptr; + if (!s || RSTRING(str)->len == 0) return Qnil; + e = t = s + RSTRING(str)->len; + /* remove spaces at head */ + while (s < t && ISSPACE(*s)) s++; + + if (s > RSTRING(str)->ptr) { + rb_str_modify(str); + RSTRING(str)->len = t-s; + memmove(RSTRING(str)->ptr, s, RSTRING(str)->len); + RSTRING(str)->ptr[RSTRING(str)->len] = '\0'; + return str; + } + return Qnil; +} + + +/* + * call-seq: + * str.lstrip => new_str + * + * Returns a copy of <i>str</i> with leading whitespace removed. See also + * <code>String#rstrip</code> and <code>String#strip</code>. + * + * " hello ".lstrip #=> "hello " + * "hello".lstrip #=> "hello" + */ + +static VALUE +rb_str_lstrip(str) + VALUE str; +{ + str = rb_str_dup(str); + rb_str_lstrip_bang(str); + return str; +} + + +/* + * call-seq: + * str.rstrip! => self or nil + * + * Removes trailing whitespace from <i>str</i>, returning <code>nil</code> if + * no change was made. See also <code>String#lstrip!</code> and + * <code>String#strip!</code>. + * + * " hello ".rstrip #=> " hello" + * "hello".rstrip! #=> nil + */ + +static VALUE +rb_str_rstrip_bang(str) + VALUE str; +{ + char *s, *t, *e; + + s = RSTRING(str)->ptr; + if (!s || RSTRING(str)->len == 0) return Qnil; + e = t = s + RSTRING(str)->len; + + /* remove trailing '\0's */ + while (s < t && t[-1] == '\0') t--; + + /* remove trailing spaces */ + while (s < t && ISSPACE(*(t-1))) t--; + + if (t < e) { + rb_str_modify(str); + RSTRING(str)->len = t-s; + RSTRING(str)->ptr[RSTRING(str)->len] = '\0'; + return str; + } + return Qnil; +} + + +/* + * call-seq: + * str.rstrip => new_str + * + * Returns a copy of <i>str</i> with trailing whitespace removed. See also + * <code>String#lstrip</code> and <code>String#strip</code>. + * + * " hello ".rstrip #=> " hello" + * "hello".rstrip #=> "hello" + */ + +static VALUE +rb_str_rstrip(str) + VALUE str; +{ + str = rb_str_dup(str); + rb_str_rstrip_bang(str); + return str; +} + + +/* + * call-seq: + * str.strip! => str or nil + * + * Removes leading and trailing whitespace from <i>str</i>. Returns + * <code>nil</code> if <i>str</i> was not altered. + */ + +static VALUE +rb_str_strip_bang(str) + VALUE str; +{ + VALUE l = rb_str_lstrip_bang(str); + VALUE r = rb_str_rstrip_bang(str); + + if (NIL_P(l) && NIL_P(r)) return Qnil; + return str; +} + + +/* + * call-seq: + * str.strip => new_str + * + * Returns a copy of <i>str</i> with leading and trailing whitespace removed. + * + * " hello ".strip #=> "hello" + * "\tgoodbye\r\n".strip #=> "goodbye" + */ + +static VALUE +rb_str_strip(str) + VALUE str; +{ + str = rb_str_dup(str); + rb_str_strip_bang(str); + return str; +} + +static VALUE +scan_once(str, pat, start) + VALUE str, pat; + long *start; +{ + VALUE result, match; + struct re_registers *regs; + long i; + + if (rb_reg_search(pat, str, *start, 0) >= 0) { + match = rb_backref_get(); + regs = RMATCH(match)->regs; + if (BEG(0) == END(0)) { + /* + * Always consume at least one character of the input string + */ + if (RSTRING(str)->len < END(0)) + *start = END(0)+mbclen2(RSTRING(str)->ptr[END(0)],pat); + else + *start = END(0)+1; + } + else { + *start = END(0); + } + if (regs->num_regs == 1) { + return rb_reg_nth_match(0, match); + } + result = rb_ary_new2(regs->num_regs); + for (i=1; i < regs->num_regs; i++) { + rb_ary_push(result, rb_reg_nth_match(i, match)); + } + + return result; + } + return Qnil; +} + + +/* + * call-seq: + * str.scan(pattern) => array + * str.scan(pattern) {|match, ...| block } => str + * + * Both forms iterate through <i>str</i>, matching the pattern (which may be a + * <code>Regexp</code> or a <code>String</code>). For each match, a result is + * generated and either added to the result array or passed to the block. If + * the pattern contains no groups, each individual result consists of the + * matched string, <code>$&</code>. If the pattern contains groups, each + * individual result is itself an array containing one entry per group. + * + * a = "cruel world" + * a.scan(/\w+/) #=> ["cruel", "world"] + * a.scan(/.../) #=> ["cru", "el ", "wor"] + * a.scan(/(...)/) #=> [["cru"], ["el "], ["wor"]] + * a.scan(/(..)(..)/) #=> [["cr", "ue"], ["l ", "wo"]] + * + * And the block form: + * + * a.scan(/\w+/) {|w| print "<<#{w}>> " } + * print "\n" + * a.scan(/(.)(.)/) {|a,b| print b, a } + * print "\n" + * + * <em>produces:</em> + * + * <<cruel>> <<world>> + * rceu lowlr + */ + +static VALUE +rb_str_scan(str, pat) + VALUE str, pat; +{ + VALUE result; + long start = 0; + VALUE match = Qnil; + + pat = get_pat(pat, 1); + if (!rb_block_given_p()) { + VALUE ary = rb_ary_new(); + + while (!NIL_P(result = scan_once(str, pat, &start))) { + match = rb_backref_get(); + rb_ary_push(ary, result); + } + rb_backref_set(match); + return ary; + } + + while (!NIL_P(result = scan_once(str, pat, &start))) { + match = rb_backref_get(); + rb_match_busy(match); + rb_yield(result); + rb_backref_set(match); /* restore $~ value */ + } + rb_backref_set(match); + return str; +} + +/* + * call-seq: + * scan(pattern) => array + * scan(pattern) {|///| block } => $_ + * + * Equivalent to calling <code>$_.scan</code>. See + * <code>String#scan</code>. + */ + +static VALUE +rb_f_scan(self, pat) + VALUE self, pat; +{ + return rb_str_scan(uscore_get(), pat); +} + + +/* + * call-seq: + * str.hex => integer + * + * Treats leading characters from <i>str</i> as a string of hexadecimal digits + * (with an optional sign and an optional <code>0x</code>) and returns the + * corresponding number. Zero is returned on error. + * + * "0x0a".hex #=> 10 + * "-1234".hex #=> -4660 + * "0".hex #=> 0 + * "wombat".hex #=> 0 + */ + +static VALUE +rb_str_hex(str) + VALUE str; +{ + return rb_str_to_inum(str, 16, Qfalse); +} + + +/* + * call-seq: + * str.oct => integer + * + * Treats leading characters of <i>str</i> as a string of octal digits (with an + * optional sign) and returns the corresponding number. Returns 0 if the + * conversion fails. + * + * "123".oct #=> 83 + * "-377".oct #=> -255 + * "bad".oct #=> 0 + * "0377bad".oct #=> 255 + */ + +static VALUE +rb_str_oct(str) + VALUE str; +{ + return rb_str_to_inum(str, -8, Qfalse); +} + + +/* + * call-seq: + * str.crypt(other_str) => new_str + * + * Applies a one-way cryptographic hash to <i>str</i> by invoking the standard + * library function <code>crypt</code>. The argument is the salt string, which + * should be two characters long, each character drawn from + * <code>[a-zA-Z0-9./]</code>. + */ + +static VALUE +rb_str_crypt(str, salt) + VALUE str, salt; +{ + extern char *crypt(); + VALUE result; + char *s; + + StringValue(salt); + if (RSTRING(salt)->len < 2) + rb_raise(rb_eArgError, "salt too short (need >=2 bytes)"); + + if (RSTRING(str)->ptr) s = RSTRING(str)->ptr; + else s = ""; + result = rb_str_new2(crypt(s, RSTRING(salt)->ptr)); + OBJ_INFECT(result, str); + OBJ_INFECT(result, salt); + return result; +} + + +/* + * call-seq: + * str.intern => symbol + * str.to_sym => symbol + * + * Returns the <code>Symbol</code> corresponding to <i>str</i>, creating the + * symbol if it did not previously exist. See <code>Symbol#id2name</code>. + * + * "Koala".intern #=> :Koala + * s = 'cat'.to_sym #=> :cat + * s == :cat #=> true + * s = '@cat'.to_sym #=> :@cat + * s == :@cat #=> true + * + * This can also be used to create symbols that cannot be represented using the + * <code>:xxx</code> notation. + * + * 'cat and dog'.to_sym #=> :"cat and dog" + */ + +VALUE +rb_str_intern(s) + VALUE s; +{ + volatile VALUE str = s; + ID id; + + if (!RSTRING(str)->ptr || RSTRING(str)->len == 0) { + rb_raise(rb_eArgError, "interning empty string"); + } + if (strlen(RSTRING(str)->ptr) != RSTRING(str)->len) + rb_raise(rb_eArgError, "symbol string may not contain `\\0'"); + id = rb_intern(RSTRING(str)->ptr); + return ID2SYM(id); +} + + +/* + * call-seq: + * str.sum(n=16) => integer + * + * Returns a basic <em>n</em>-bit checksum of the characters in <i>str</i>, + * where <em>n</em> is the optional <code>Fixnum</code> parameter, defaulting + * to 16. The result is simply the sum of the binary value of each character in + * <i>str</i> modulo <code>2n - 1</code>. This is not a particularly good + * checksum. + */ + +static VALUE +rb_str_sum(argc, argv, str) + int argc; + VALUE *argv; + VALUE str; +{ + VALUE vbits; + int bits; + char *ptr, *p, *pend; + long len; + + if (rb_scan_args(argc, argv, "01", &vbits) == 0) { + bits = 16; + } + else bits = NUM2INT(vbits); + + ptr = p = RSTRING(str)->ptr; + len = RSTRING(str)->len; + pend = p + len; + if (bits >= sizeof(long)*CHAR_BIT) { + VALUE sum = INT2FIX(0); + + while (p < pend) { + str_mod_check(str, ptr, len); + sum = rb_funcall(sum, '+', 1, INT2FIX((unsigned char)*p)); + p++; + } + if (bits != 0) { + VALUE mod; + + mod = rb_funcall(INT2FIX(1), rb_intern("<<"), 1, INT2FIX(bits)); + mod = rb_funcall(mod, '-', 1, INT2FIX(1)); + sum = rb_funcall(sum, '&', 1, mod); + } + return sum; + } + else { + unsigned long sum = 0; + + while (p < pend) { + str_mod_check(str, ptr, len); + sum += (unsigned char)*p; + p++; + } + if (bits != 0) { + sum &= (((unsigned long)1)<<bits)-1; + } + return rb_int2inum(sum); + } +} + +static VALUE +rb_str_justify(argc, argv, str, jflag) + int argc; + VALUE *argv; + VALUE str; + char jflag; +{ + VALUE w; + long width, flen = 0; + VALUE res; + char *p, *pend, *f = " "; + long n; + VALUE pad; + + rb_scan_args(argc, argv, "11", &w, &pad); + width = NUM2LONG(w); + if (argc == 2) { + StringValue(pad); + f = RSTRING(pad)->ptr; + flen = RSTRING(pad)->len; + if (flen == 0) { + rb_raise(rb_eArgError, "zero width padding"); + } + } + if (width < 0 || RSTRING(str)->len >= width) return rb_str_dup(str); + res = rb_str_new5(str, 0, width); + p = RSTRING(res)->ptr; + if (jflag != 'l') { + n = width - RSTRING(str)->len; + pend = p + ((jflag == 'r') ? n : n/2); + if (flen <= 1) { + while (p < pend) { + *p++ = *f; + } + } + else { + char *q = f; + while (p + flen <= pend) { + memcpy(p,f,flen); + p += flen; + } + while (p < pend) { + *p++ = *q++; + } + } + } + memcpy(p, RSTRING(str)->ptr, RSTRING(str)->len); + if (jflag != 'r') { + p += RSTRING(str)->len; pend = RSTRING(res)->ptr + width; + if (flen <= 1) { + while (p < pend) { + *p++ = *f; + } + } + else { + while (p + flen <= pend) { + memcpy(p,f,flen); + p += flen; + } + while (p < pend) { + *p++ = *f++; + } + } + } + OBJ_INFECT(res, str); + if (flen > 0) OBJ_INFECT(res, pad); + return res; +} + + +/* + * call-seq: + * str.ljust(integer, padstr=' ') => new_str + * + * If <i>integer</i> is greater than the length of <i>str</i>, returns a new + * <code>String</code> of length <i>integer</i> with <i>str</i> left justified + * and padded with <i>padstr</i>; otherwise, returns <i>str</i>. + * + * "hello".ljust(4) #=> "hello" + * "hello".ljust(20) #=> "hello " + * "hello".ljust(20, '1234') #=> "hello123412341234123" + */ + +static VALUE +rb_str_ljust(argc, argv, str) + int argc; + VALUE *argv; + VALUE str; +{ + return rb_str_justify(argc, argv, str, 'l'); +} + + +/* + * call-seq: + * str.rjust(integer, padstr=' ') => new_str + * + * If <i>integer</i> is greater than the length of <i>str</i>, returns a new + * <code>String</code> of length <i>integer</i> with <i>str</i> right justified + * and padded with <i>padstr</i>; otherwise, returns <i>str</i>. + * + * "hello".rjust(4) #=> "hello" + * "hello".rjust(20) #=> " hello" + * "hello".rjust(20, '1234') #=> "123412341234123hello" + */ + +static VALUE +rb_str_rjust(argc, argv, str) + int argc; + VALUE *argv; + VALUE str; +{ + return rb_str_justify(argc, argv, str, 'r'); +} + + +/* + * call-seq: + * str.center(integer, padstr) => new_str + * + * If <i>integer</i> is greater than the length of <i>str</i>, returns a new + * <code>String</code> of length <i>integer</i> with <i>str</i> centered and + * padded with <i>padstr</i>; otherwise, returns <i>str</i>. + * + * "hello".center(4) #=> "hello" + * "hello".center(20) #=> " hello " + * "hello".center(20, '123') #=> "1231231hello12312312" + */ + +static VALUE +rb_str_center(argc, argv, str) + int argc; + VALUE *argv; + VALUE str; +{ + return rb_str_justify(argc, argv, str, 'c'); +} + +void +rb_str_setter(val, id, var) + VALUE val; + ID id; + VALUE *var; +{ + if (!NIL_P(val) && TYPE(val) != T_STRING) { + rb_raise(rb_eTypeError, "value of %s must be String", rb_id2name(id)); + } + *var = val; +} + + +/* + * A <code>String</code> object holds and manipulates an arbitrary sequence of + * bytes, typically representing characters. String objects may be created + * using <code>String::new</code> or as literals. + * + * Because of aliasing issues, users of strings should be aware of the methods + * that modify the contents of a <code>String</code> object. Typically, + * methods with names ending in ``!'' modify their receiver, while those + * without a ``!'' return a new <code>String</code>. However, there are + * exceptions, such as <code>String#[]=</code>. + * + */ + +void +Init_String() +{ + rb_cString = rb_define_class("String", rb_cObject); + rb_include_module(rb_cString, rb_mComparable); + rb_include_module(rb_cString, rb_mEnumerable); + rb_define_alloc_func(rb_cString, str_alloc); + rb_define_method(rb_cString, "initialize", rb_str_init, -1); + rb_define_method(rb_cString, "initialize_copy", rb_str_replace, 1); + rb_define_method(rb_cString, "<=>", rb_str_cmp_m, 1); + rb_define_method(rb_cString, "==", rb_str_equal, 1); + rb_define_method(rb_cString, "eql?", rb_str_eql, 1); + rb_define_method(rb_cString, "hash", rb_str_hash_m, 0); + rb_define_method(rb_cString, "casecmp", rb_str_casecmp, 1); + rb_define_method(rb_cString, "+", rb_str_plus, 1); + rb_define_method(rb_cString, "*", rb_str_times, 1); + rb_define_method(rb_cString, "%", rb_str_format, 1); + rb_define_method(rb_cString, "[]", rb_str_aref_m, -1); + rb_define_method(rb_cString, "[]=", rb_str_aset_m, -1); + rb_define_method(rb_cString, "insert", rb_str_insert, 2); + rb_define_method(rb_cString, "length", rb_str_length, 0); + rb_define_method(rb_cString, "size", rb_str_length, 0); + rb_define_method(rb_cString, "empty?", rb_str_empty, 0); + rb_define_method(rb_cString, "=~", rb_str_match, 1); + rb_define_method(rb_cString, "match", rb_str_match_m, -1); + rb_define_method(rb_cString, "succ", rb_str_succ, 0); + rb_define_method(rb_cString, "succ!", rb_str_succ_bang, 0); + rb_define_method(rb_cString, "next", rb_str_succ, 0); + rb_define_method(rb_cString, "next!", rb_str_succ_bang, 0); + rb_define_method(rb_cString, "upto", rb_str_upto_m, 1); + rb_define_method(rb_cString, "index", rb_str_index_m, -1); + rb_define_method(rb_cString, "rindex", rb_str_rindex_m, -1); + rb_define_method(rb_cString, "replace", rb_str_replace, 1); + rb_define_method(rb_cString, "clear", rb_str_clear, 0); + + rb_define_method(rb_cString, "to_i", rb_str_to_i, -1); + rb_define_method(rb_cString, "to_f", rb_str_to_f, 0); + rb_define_method(rb_cString, "to_s", rb_str_to_s, 0); + rb_define_method(rb_cString, "to_str", rb_str_to_s, 0); + rb_define_method(rb_cString, "inspect", rb_str_inspect, 0); + rb_define_method(rb_cString, "dump", rb_str_dump, 0); + + rb_define_method(rb_cString, "upcase", rb_str_upcase, 0); + rb_define_method(rb_cString, "downcase", rb_str_downcase, 0); + rb_define_method(rb_cString, "capitalize", rb_str_capitalize, 0); + rb_define_method(rb_cString, "swapcase", rb_str_swapcase, 0); + + rb_define_method(rb_cString, "upcase!", rb_str_upcase_bang, 0); + rb_define_method(rb_cString, "downcase!", rb_str_downcase_bang, 0); + rb_define_method(rb_cString, "capitalize!", rb_str_capitalize_bang, 0); + rb_define_method(rb_cString, "swapcase!", rb_str_swapcase_bang, 0); + + rb_define_method(rb_cString, "hex", rb_str_hex, 0); + rb_define_method(rb_cString, "oct", rb_str_oct, 0); + rb_define_method(rb_cString, "split", rb_str_split_m, -1); + rb_define_method(rb_cString, "reverse", rb_str_reverse, 0); + rb_define_method(rb_cString, "reverse!", rb_str_reverse_bang, 0); + rb_define_method(rb_cString, "concat", rb_str_concat, 1); + rb_define_method(rb_cString, "<<", rb_str_concat, 1); + rb_define_method(rb_cString, "crypt", rb_str_crypt, 1); + rb_define_method(rb_cString, "intern", rb_str_intern, 0); + rb_define_method(rb_cString, "to_sym", rb_str_intern, 0); + + rb_define_method(rb_cString, "include?", rb_str_include, 1); + + rb_define_method(rb_cString, "scan", rb_str_scan, 1); + + rb_define_method(rb_cString, "ljust", rb_str_ljust, -1); + rb_define_method(rb_cString, "rjust", rb_str_rjust, -1); + rb_define_method(rb_cString, "center", rb_str_center, -1); + + rb_define_method(rb_cString, "sub", rb_str_sub, -1); + rb_define_method(rb_cString, "gsub", rb_str_gsub, -1); + rb_define_method(rb_cString, "chop", rb_str_chop, 0); + rb_define_method(rb_cString, "chomp", rb_str_chomp, -1); + rb_define_method(rb_cString, "strip", rb_str_strip, 0); + rb_define_method(rb_cString, "lstrip", rb_str_lstrip, 0); + rb_define_method(rb_cString, "rstrip", rb_str_rstrip, 0); + + rb_define_method(rb_cString, "sub!", rb_str_sub_bang, -1); + rb_define_method(rb_cString, "gsub!", rb_str_gsub_bang, -1); + rb_define_method(rb_cString, "chop!", rb_str_chop_bang, 0); + rb_define_method(rb_cString, "chomp!", rb_str_chomp_bang, -1); + rb_define_method(rb_cString, "strip!", rb_str_strip_bang, 0); + rb_define_method(rb_cString, "lstrip!", rb_str_lstrip_bang, 0); + rb_define_method(rb_cString, "rstrip!", rb_str_rstrip_bang, 0); + + rb_define_method(rb_cString, "tr", rb_str_tr, 2); + rb_define_method(rb_cString, "tr_s", rb_str_tr_s, 2); + rb_define_method(rb_cString, "delete", rb_str_delete, -1); + rb_define_method(rb_cString, "squeeze", rb_str_squeeze, -1); + rb_define_method(rb_cString, "count", rb_str_count, -1); + + rb_define_method(rb_cString, "tr!", rb_str_tr_bang, 2); + rb_define_method(rb_cString, "tr_s!", rb_str_tr_s_bang, 2); + rb_define_method(rb_cString, "delete!", rb_str_delete_bang, -1); + rb_define_method(rb_cString, "squeeze!", rb_str_squeeze_bang, -1); + + rb_define_method(rb_cString, "each_line", rb_str_each_line, -1); + rb_define_method(rb_cString, "each", rb_str_each_line, -1); + rb_define_method(rb_cString, "each_byte", rb_str_each_byte, 0); + + rb_define_method(rb_cString, "sum", rb_str_sum, -1); + + rb_define_global_function("sub", rb_f_sub, -1); + rb_define_global_function("gsub", rb_f_gsub, -1); + + rb_define_global_function("sub!", rb_f_sub_bang, -1); + rb_define_global_function("gsub!", rb_f_gsub_bang, -1); + + rb_define_global_function("chop", rb_f_chop, 0); + rb_define_global_function("chop!", rb_f_chop_bang, 0); + + rb_define_global_function("chomp", rb_f_chomp, -1); + rb_define_global_function("chomp!", rb_f_chomp_bang, -1); + + rb_define_global_function("split", rb_f_split, -1); + rb_define_global_function("scan", rb_f_scan, 1); + + rb_define_method(rb_cString, "slice", rb_str_aref_m, -1); + rb_define_method(rb_cString, "slice!", rb_str_slice_bang, -1); + + id_to_s = rb_intern("to_s"); + + rb_fs = Qnil; + rb_define_variable("$;", &rb_fs); + rb_define_variable("$-F", &rb_fs); +} +/********************************************************************** + + struct.c - + + $Author: matz $ + $Date: 2005/04/18 06:38:30 $ + created at: Tue Mar 22 18:44:30 JST 1995 + + Copyright (C) 1993-2003 Yukihiro Matsumoto + +**********************************************************************/ + +#include "ruby.h" + +VALUE rb_cStruct; + +static VALUE struct_alloc _((VALUE)); + +VALUE +rb_struct_iv_get(c, name) + VALUE c; + char *name; +{ + ID id; + + id = rb_intern(name); + for (;;) { + if (rb_ivar_defined(c, id)) + return rb_ivar_get(c, id); + c = RCLASS(c)->super; + if (c == 0 || c == rb_cStruct) + return Qnil; + } +} + +VALUE +rb_struct_s_members(klass) + VALUE klass; +{ + VALUE members = rb_struct_iv_get(klass, "__members__"); + + if (NIL_P(members)) { + rb_bug("non-initialized struct"); + } + return members; +} + +VALUE +rb_struct_members(s) + VALUE s; +{ + VALUE members = rb_struct_s_members(rb_obj_class(s)); + + if (RSTRUCT(s)->len != RARRAY(members)->len) { + rb_raise(rb_eTypeError, "struct size differs (%d required %d given)", + RARRAY(members)->len, RSTRUCT(s)->len); + } + return members; +} + +static VALUE +rb_struct_s_members_m(klass) + VALUE klass; +{ + VALUE members, ary; + VALUE *p, *pend; + + members = rb_struct_s_members(klass); + ary = rb_ary_new2(RARRAY(members)->len); + p = RARRAY(members)->ptr; pend = p + RARRAY(members)->len; + while (p < pend) { + rb_ary_push(ary, rb_str_new2(rb_id2name(SYM2ID(*p)))); + p++; + } + + return ary; +} + +/* + * call-seq: + * struct.members => array + * + * Returns an array of strings representing the names of the instance + * variables. + * + * Customer = Struct.new(:name, :address, :zip) + * joe = Customer.new("Joe Smith", "123 Maple, Anytown NC", 12345) + * joe.members #=> ["name", "address", "zip"] + */ + +static VALUE +rb_struct_members_m(obj) + VALUE obj; +{ + return rb_struct_s_members_m(rb_obj_class(obj)); +} + +VALUE +rb_struct_getmember(obj, id) + VALUE obj; + ID id; +{ + VALUE members, slot; + long i; + + members = rb_struct_members(obj); + slot = ID2SYM(id); + for (i=0; i<RARRAY(members)->len; i++) { + if (RARRAY(members)->ptr[i] == slot) { + return RSTRUCT(obj)->ptr[i]; + } + } + rb_name_error(id, "%s is not struct member", rb_id2name(id)); + return Qnil; /* not reached */ +} + +static VALUE +rb_struct_ref(obj) + VALUE obj; +{ + return rb_struct_getmember(obj, rb_frame_this_func()); +} + +static VALUE rb_struct_ref0(obj) VALUE obj; {return RSTRUCT(obj)->ptr[0];} +static VALUE rb_struct_ref1(obj) VALUE obj; {return RSTRUCT(obj)->ptr[1];} +static VALUE rb_struct_ref2(obj) VALUE obj; {return RSTRUCT(obj)->ptr[2];} +static VALUE rb_struct_ref3(obj) VALUE obj; {return RSTRUCT(obj)->ptr[3];} +static VALUE rb_struct_ref4(obj) VALUE obj; {return RSTRUCT(obj)->ptr[4];} +static VALUE rb_struct_ref5(obj) VALUE obj; {return RSTRUCT(obj)->ptr[5];} +static VALUE rb_struct_ref6(obj) VALUE obj; {return RSTRUCT(obj)->ptr[6];} +static VALUE rb_struct_ref7(obj) VALUE obj; {return RSTRUCT(obj)->ptr[7];} +static VALUE rb_struct_ref8(obj) VALUE obj; {return RSTRUCT(obj)->ptr[8];} +static VALUE rb_struct_ref9(obj) VALUE obj; {return RSTRUCT(obj)->ptr[9];} + +static VALUE (*ref_func[10])() = { + rb_struct_ref0, + rb_struct_ref1, + rb_struct_ref2, + rb_struct_ref3, + rb_struct_ref4, + rb_struct_ref5, + rb_struct_ref6, + rb_struct_ref7, + rb_struct_ref8, + rb_struct_ref9, +}; + +static void +rb_struct_modify(s) + VALUE s; +{ + if (OBJ_FROZEN(s)) rb_error_frozen("Struct"); + if (!OBJ_TAINTED(s) && rb_safe_level() >= 4) + rb_raise(rb_eSecurityError, "Insecure: can't modify Struct"); +} + +static VALUE +rb_struct_set(obj, val) + VALUE obj, val; +{ + VALUE members, slot; + long i; + + members = rb_struct_members(obj); + rb_struct_modify(obj); + for (i=0; i<RARRAY(members)->len; i++) { + slot = RARRAY(members)->ptr[i]; + if (rb_id_attrset(SYM2ID(slot)) == rb_frame_this_func()) { + return RSTRUCT(obj)->ptr[i] = val; + } + } + rb_name_error(rb_frame_this_func(), "`%s' is not a struct member", + rb_id2name(rb_frame_this_func())); + return Qnil; /* not reached */ +} + +static VALUE +make_struct(name, members, klass) + VALUE name, members, klass; +{ + VALUE nstr; + ID id; + long i; + + OBJ_FREEZE(members); + if (NIL_P(name)) { + nstr = rb_class_new(klass); + rb_make_metaclass(nstr, RBASIC(klass)->klass); + rb_class_inherited(klass, nstr); + } + else { + char *cname = StringValuePtr(name); + + id = rb_intern(cname); + if (!rb_is_const_id(id)) { + rb_name_error(id, "identifier %s needs to be constant", cname); + } + if (rb_const_defined_at(klass, id)) { + rb_warn("redefining constant Struct::%s", cname); + rb_mod_remove_const(klass, ID2SYM(id)); + } + nstr = rb_define_class_under(klass, rb_id2name(id), klass); + } + rb_iv_set(nstr, "__size__", LONG2NUM(RARRAY(members)->len)); + rb_iv_set(nstr, "__members__", members); + + rb_define_alloc_func(nstr, struct_alloc); + rb_define_singleton_method(nstr, "new", rb_class_new_instance, -1); + rb_define_singleton_method(nstr, "[]", rb_class_new_instance, -1); + rb_define_singleton_method(nstr, "members", rb_struct_s_members_m, 0); + for (i=0; i< RARRAY(members)->len; i++) { + ID id = SYM2ID(RARRAY(members)->ptr[i]); + if (rb_is_local_id(id) || rb_is_const_id(id)) { + if (i<sizeof(ref_func)) { + rb_define_method_id(nstr, id, ref_func[i], 0); + } + else { + rb_define_method_id(nstr, id, rb_struct_ref, 0); + } + rb_define_method_id(nstr, rb_id_attrset(id), rb_struct_set, 1); + } + } + + return nstr; +} + +#ifdef HAVE_STDARG_PROTOTYPES +#include <stdarg.h> +#define va_init_list(a,b) va_start(a,b) +#else +#include <varargs.h> +#define va_init_list(a,b) va_start(a) +#endif + +VALUE +#ifdef HAVE_STDARG_PROTOTYPES +rb_struct_define(const char *name, ...) +#else +rb_struct_define(name, va_alist) + const char *name; + va_dcl +#endif +{ + va_list ar; + VALUE nm, ary; + char *mem; + + if (!name) nm = Qnil; + else nm = rb_str_new2(name); + ary = rb_ary_new(); + + va_init_list(ar, name); + while (mem = va_arg(ar, char*)) { + ID slot = rb_intern(mem); + rb_ary_push(ary, ID2SYM(slot)); + } + va_end(ar); + + return make_struct(nm, ary, rb_cStruct); +} + +/* + * call-seq: + * Struct.new( [aString] [, aSym]+> ) => StructClass + * StructClass.new(arg, ...) => obj + * StructClass[arg, ...] => obj + * + * Creates a new class, named by <i>aString</i>, containing accessor + * methods for the given symbols. If the name <i>aString</i> is + * omitted, an anonymous structure class will be created. Otherwise, + * the name of this struct will appear as a constant in class + * <code>Struct</code>, so it must be unique for all + * <code>Struct</code>s in the system and should start with a capital + * letter. Assigning a structure class to a constant effectively gives + * the class the name of the constant. + * + * <code>Struct::new</code> returns a new <code>Class</code> object, + * which can then be used to create specific instances of the new + * structure. The number of actual parameters must be + * less than or equal to the number of attributes defined for this + * class; unset parameters default to \nil{}. Passing too many + * parameters will raise an \E{ArgumentError}. + * + * The remaining methods listed in this section (class and instance) + * are defined for this generated class. + * + * # Create a structure with a name in Struct + * Struct.new("Customer", :name, :address) #=> Struct::Customer + * Struct::Customer.new("Dave", "123 Main") #=> #<Struct::Customer name="Dave", address="123 Main"> + * + * # Create a structure named by its constant + * Customer = Struct.new(:name, :address) #=> Customer + * Customer.new("Dave", "123 Main") #=> #<Customer name="Dave", address="123 Main"> + */ + +static VALUE +rb_struct_s_def(argc, argv, klass) + int argc; + VALUE *argv; + VALUE klass; +{ + VALUE name, rest; + long i; + VALUE st; + ID id; + + rb_scan_args(argc, argv, "1*", &name, &rest); + for (i=0; i<RARRAY(rest)->len; i++) { + id = rb_to_id(RARRAY(rest)->ptr[i]); + RARRAY(rest)->ptr[i] = ID2SYM(id); + } + if (!NIL_P(name)) { + VALUE tmp = rb_check_string_type(name); + + if (NIL_P(tmp)) { + id = rb_to_id(name); + rb_ary_unshift(rest, ID2SYM(id)); + name = Qnil; + } + } + st = make_struct(name, rest, klass); + if (rb_block_given_p()) { + rb_mod_module_eval(0, 0, st); + } + + return st; +} + +/* + */ + +static VALUE +rb_struct_initialize(self, values) + VALUE self, values; +{ + VALUE klass = rb_obj_class(self); + VALUE size; + long n; + + rb_struct_modify(self); + size = rb_struct_iv_get(klass, "__size__"); + n = FIX2LONG(size); + if (n < RARRAY(values)->len) { + rb_raise(rb_eArgError, "struct size differs"); + } + MEMCPY(RSTRUCT(self)->ptr, RARRAY(values)->ptr, VALUE, RARRAY(values)->len); + if (n > RARRAY(values)->len) { + rb_mem_clear(RSTRUCT(self)->ptr+RARRAY(values)->len, + n-RARRAY(values)->len); + } + return Qnil; +} + +static VALUE +struct_alloc(klass) + VALUE klass; +{ + VALUE size; + long n; + NEWOBJ(st, struct RStruct); + OBJSETUP(st, klass, T_STRUCT); + + size = rb_struct_iv_get(klass, "__size__"); + n = FIX2LONG(size); + + st->ptr = ALLOC_N(VALUE, n); + rb_mem_clear(st->ptr, n); + st->len = n; + + return (VALUE)st; +} + +VALUE +rb_struct_alloc(klass, values) + VALUE klass, values; +{ + return rb_class_new_instance(RARRAY(values)->len, RARRAY(values)->ptr, klass); +} + +VALUE +#ifdef HAVE_STDARG_PROTOTYPES +rb_struct_new(VALUE klass, ...) +#else +rb_struct_new(klass, va_alist) + VALUE klass; + va_dcl +#endif +{ + VALUE sz, *mem; + long size, i; + va_list args; + + sz = rb_struct_iv_get(klass, "__size__"); + size = FIX2LONG(sz); + mem = ALLOCA_N(VALUE, size); + va_init_list(args, klass); + for (i=0; i<size; i++) { + mem[i] = va_arg(args, VALUE); + } + va_end(args); + + return rb_class_new_instance(size, mem, klass); +} + +/* + * call-seq: + * struct.each {|obj| block } => struct + * + * Calls <i>block</i> once for each instance variable, passing the + * value as a parameter. + * + * Customer = Struct.new(:name, :address, :zip) + * joe = Customer.new("Joe Smith", "123 Maple, Anytown NC", 12345) + * joe.each {|x| puts(x) } + * + * <em>produces:</em> + * + * Joe Smith + * 123 Maple, Anytown NC + * 12345 + */ + +static VALUE +rb_struct_each(s) + VALUE s; +{ + long i; + + for (i=0; i<RSTRUCT(s)->len; i++) { + rb_yield(RSTRUCT(s)->ptr[i]); + } + return s; +} + +/* + * call-seq: + * struct.each_pair {|sym, obj| block } => struct + * + * Calls <i>block</i> once for each instance variable, passing the name + * (as a symbol) and the value as parameters. + * + * Customer = Struct.new(:name, :address, :zip) + * joe = Customer.new("Joe Smith", "123 Maple, Anytown NC", 12345) + * joe.each_pair {|name, value| puts("#{name} => #{value}") } + * + * <em>produces:</em> + * + * name => Joe Smith + * address => 123 Maple, Anytown NC + * zip => 12345 + */ + +static VALUE +rb_struct_each_pair(s) + VALUE s; +{ + VALUE members; + long i; + + members = rb_struct_members(s); + for (i=0; i<RSTRUCT(s)->len; i++) { + rb_yield_values(2, rb_ary_entry(members, i), RSTRUCT(s)->ptr[i]); + } + return s; +} + +static VALUE +inspect_struct(s, dummy, recur) + VALUE s, dummy; + int recur; +{ + char *cname = rb_class2name(rb_obj_class(s)); + VALUE str, members; + long i; + + if (recur) { + char *cname = rb_class2name(rb_obj_class(s)); + VALUE str = rb_str_new(0, strlen(cname) + 15); + + sprintf(RSTRING(str)->ptr, "#<struct %s:...>", cname); + RSTRING(str)->len = strlen(RSTRING(str)->ptr); + return str; + } + + members = rb_struct_members(s); + str = rb_str_buf_new2("#<struct "); + rb_str_cat2(str, cname); + rb_str_cat2(str, " "); + for (i=0; i<RSTRUCT(s)->len; i++) { + VALUE slot; + ID id; + char *p; + + if (i > 0) { + rb_str_cat2(str, ", "); + } + slot = RARRAY(members)->ptr[i]; + id = SYM2ID(slot); + if (rb_is_local_id(id) || rb_is_const_id(id)) { + p = rb_id2name(id); + rb_str_cat2(str, p); + } + else { + rb_str_append(str, rb_inspect(slot)); + } + rb_str_cat2(str, "="); + rb_str_append(str, rb_inspect(RSTRUCT(s)->ptr[i])); + } + rb_str_cat2(str, ">"); + OBJ_INFECT(str, s); + + return str; +} + +/* + * call-seq: + * struct.to_s => string + * struct.inspect => string + * + * Describe the contents of this struct in a string. + */ + +static VALUE +rb_struct_inspect(s) + VALUE s; +{ + return rb_exec_recursive(inspect_struct, s, 0); +} + +/* + * call-seq: + * struct.to_a => array + * struct.values => array + * + * Returns the values for this instance as an array. + * + * Customer = Struct.new(:name, :address, :zip) + * joe = Customer.new("Joe Smith", "123 Maple, Anytown NC", 12345) + * joe.to_a[1] #=> "123 Maple, Anytown NC" + */ + +static VALUE +rb_struct_to_a(s) + VALUE s; +{ + return rb_ary_new4(RSTRUCT(s)->len, RSTRUCT(s)->ptr); +} + +/* :nodoc: */ +static VALUE +rb_struct_init_copy(copy, s) + VALUE copy, s; +{ + if (copy == s) return copy; + rb_check_frozen(copy); + if (!rb_obj_is_instance_of(s, rb_obj_class(copy))) { + rb_raise(rb_eTypeError, "wrong argument class"); + } + RSTRUCT(copy)->ptr = ALLOC_N(VALUE, RSTRUCT(s)->len); + RSTRUCT(copy)->len = RSTRUCT(s)->len; + MEMCPY(RSTRUCT(copy)->ptr, RSTRUCT(s)->ptr, VALUE, RSTRUCT(copy)->len); + + return copy; +} + +static VALUE +rb_struct_aref_id(s, id) + VALUE s; + ID id; +{ + VALUE members; + long i, len; + + members = rb_struct_members(s); + len = RARRAY(members)->len; + for (i=0; i<len; i++) { + if (SYM2ID(RARRAY(members)->ptr[i]) == id) { + return RSTRUCT(s)->ptr[i]; + } + } + rb_name_error(id, "no member '%s' in struct", rb_id2name(id)); + return Qnil; /* not reached */ +} + +/* + * call-seq: + * struct[symbol] => anObject + * struct[fixnum] => anObject + * + * Attribute Reference---Returns the value of the instance variable + * named by <i>symbol</i>, or indexed (0..length-1) by + * <i>fixnum</i>. Will raise <code>NameError</code> if the named + * variable does not exist, or <code>IndexError</code> if the index is + * out of range. + * + * Customer = Struct.new(:name, :address, :zip) + * joe = Customer.new("Joe Smith", "123 Maple, Anytown NC", 12345) + * + * joe["name"] #=> "Joe Smith" + * joe[:name] #=> "Joe Smith" + * joe[0] #=> "Joe Smith" + */ + +VALUE +rb_struct_aref(s, idx) + VALUE s, idx; +{ + long i; + + if (TYPE(idx) == T_STRING || TYPE(idx) == T_SYMBOL) { + return rb_struct_aref_id(s, rb_to_id(idx)); + } + + i = NUM2LONG(idx); + if (i < 0) i = RSTRUCT(s)->len + i; + if (i < 0) + rb_raise(rb_eIndexError, "offset %ld too small for struct(size:%ld)", + i, RSTRUCT(s)->len); + if (RSTRUCT(s)->len <= i) + rb_raise(rb_eIndexError, "offset %ld too large for struct(size:%ld)", + i, RSTRUCT(s)->len); + return RSTRUCT(s)->ptr[i]; +} + +static VALUE +rb_struct_aset_id(s, id, val) + VALUE s, val; + ID id; +{ + VALUE members; + long i, len; + + members = rb_struct_members(s); + rb_struct_modify(s); + len = RARRAY(members)->len; + if (RSTRUCT(s)->len != RARRAY(members)->len) { + rb_raise(rb_eTypeError, "struct size differs (%d required %d given)", + RARRAY(members)->len, RSTRUCT(s)->len); + } + for (i=0; i<len; i++) { + if (SYM2ID(RARRAY(members)->ptr[i]) == id) { + RSTRUCT(s)->ptr[i] = val; + return val; + } + } + rb_name_error(id, "no member '%s' in struct", rb_id2name(id)); +} + +/* + * call-seq: + * struct[symbol] = obj => obj + * struct[fixnum] = obj => obj + * + * Attribute Assignment---Assigns to the instance variable named by + * <i>symbol</i> or <i>fixnum</i> the value <i>obj</i> and + * returns it. Will raise a <code>NameError</code> if the named + * variable does not exist, or an <code>IndexError</code> if the index + * is out of range. + * + * Customer = Struct.new(:name, :address, :zip) + * joe = Customer.new("Joe Smith", "123 Maple, Anytown NC", 12345) + * + * joe["name"] = "Luke" + * joe[:zip] = "90210" + * + * joe.name #=> "Luke" + * joe.zip #=> "90210" + */ + +VALUE +rb_struct_aset(s, idx, val) + VALUE s, idx, val; +{ + long i; + + if (TYPE(idx) == T_STRING || TYPE(idx) == T_SYMBOL) { + return rb_struct_aset_id(s, rb_to_id(idx), val); + } + + i = NUM2LONG(idx); + if (i < 0) i = RSTRUCT(s)->len + i; + if (i < 0) { + rb_raise(rb_eIndexError, "offset %ld too small for struct(size:%ld)", + i, RSTRUCT(s)->len); + } + if (RSTRUCT(s)->len <= i) { + rb_raise(rb_eIndexError, "offset %ld too large for struct(size:%ld)", + i, RSTRUCT(s)->len); + } + rb_struct_modify(s); + return RSTRUCT(s)->ptr[i] = val; +} + +static VALUE struct_entry _((VALUE, long)); +static VALUE +struct_entry(s, n) + VALUE s; + long n; +{ + return rb_struct_aref(s, LONG2NUM(n)); +} + +/* + * call-seq: + * struct.values_at(selector,... ) => an_array + * + * Returns an array containing the elements in + * _self_ corresponding to the given selector(s). The selectors + * may be either integer indices or ranges. + * See also </code>.select<code>. + * + * a = %w{ a b c d e f } + * a.values_at(1, 3, 5) + * a.values_at(1, 3, 5, 7) + * a.values_at(-1, -3, -5, -7) + * a.values_at(1..3, 2...5) + */ + +static VALUE +rb_struct_values_at(argc, argv, s) + int argc; + VALUE *argv; + VALUE s; +{ + return rb_get_values_at(s, RSTRUCT(s)->len, argc, argv, struct_entry); +} + +/* + * call-seq: + * struct.select(fixnum, ... ) => array + * struct.select {|i| block } => array + * + * The first form returns an array containing the elements in + * <i>struct</i> corresponding to the given indices. The second + * form invokes the block passing in successive elements from + * <i>struct</i>, returning an array containing those elements + * for which the block returns a true value (equivalent to + * <code>Enumerable#select</code>). + * + * Lots = Struct.new(:a, :b, :c, :d, :e, :f) + * l = Lots.new(11, 22, 33, 44, 55, 66) + * l.select(1, 3, 5) #=> [22, 44, 66] + * l.select(0, 2, 4) #=> [11, 33, 55] + * l.select(-1, -3, -5) #=> [66, 44, 22] + * l.select {|v| (v % 2).zero? } #=> [22, 44, 66] + */ + +static VALUE +rb_struct_select(argc, argv, s) + int argc; + VALUE *argv; + VALUE s; +{ + VALUE result; + long i; + + if (argc > 0) { + rb_raise(rb_eArgError, "wrong number of arguments (%d for 0)", argc); + } + result = rb_ary_new(); + for (i = 0; i < RSTRUCT(s)->len; i++) { + if (RTEST(rb_yield(RSTRUCT(s)->ptr[i]))) { + rb_ary_push(result, RSTRUCT(s)->ptr[i]); + } + } + + return result; +} + +/* + * call-seq: + * struct == other_struct => true or false + * + * Equality---Returns <code>true</code> if <i>other_struct</i> is + * equal to this one: they must be of the same class as generated by + * <code>Struct::new</code>, and the values of all instance variables + * must be equal (according to <code>Object#==</code>). + * + * Customer = Struct.new(:name, :address, :zip) + * joe = Customer.new("Joe Smith", "123 Maple, Anytown NC", 12345) + * joejr = Customer.new("Joe Smith", "123 Maple, Anytown NC", 12345) + * jane = Customer.new("Jane Doe", "456 Elm, Anytown NC", 12345) + * joe == joejr #=> true + * joe == jane #=> false + */ + +static VALUE +rb_struct_equal(s, s2) + VALUE s, s2; +{ + long i; + + if (s == s2) return Qtrue; + if (TYPE(s2) != T_STRUCT) return Qfalse; + if (rb_obj_class(s) != rb_obj_class(s2)) return Qfalse; + if (RSTRUCT(s)->len != RSTRUCT(s2)->len) { + rb_bug("inconsistent struct"); /* should never happen */ + } + + for (i=0; i<RSTRUCT(s)->len; i++) { + if (!rb_equal(RSTRUCT(s)->ptr[i], RSTRUCT(s2)->ptr[i])) return Qfalse; + } + return Qtrue; +} + +/* + * call-seq: + * struct.hash => fixnum + * + * Return a hash value based on this struct's contents. + */ + +static VALUE +rb_struct_hash(s) + VALUE s; +{ + long i, h; + VALUE n; + + h = rb_hash(rb_obj_class(s)); + for (i = 0; i < RSTRUCT(s)->len; i++) { + h = (h << 1) | (h<0 ? 1 : 0); + n = rb_hash(RSTRUCT(s)->ptr[i]); + h ^= NUM2LONG(n); + } + return LONG2FIX(h); +} + +/* + * code-seq: + * struct.eql?(other) => true or false + * + * Two structures are equal if they are the same object, or if all their + * fields are equal (using <code>eql?</code>). + */ + +static VALUE +rb_struct_eql(s, s2) + VALUE s, s2; +{ + long i; + + if (s == s2) return Qtrue; + if (TYPE(s2) != T_STRUCT) return Qfalse; + if (rb_obj_class(s) != rb_obj_class(s2)) return Qfalse; + if (RSTRUCT(s)->len != RSTRUCT(s2)->len) { + rb_bug("inconsistent struct"); /* should never happen */ + } + + for (i=0; i<RSTRUCT(s)->len; i++) { + if (!rb_eql(RSTRUCT(s)->ptr[i], RSTRUCT(s2)->ptr[i])) return Qfalse; + } + return Qtrue; +} + +/* + * call-seq: + * struct.length => fixnum + * struct.size => fixnum + * + * Returns the number of instance variables. + * + * Customer = Struct.new(:name, :address, :zip) + * joe = Customer.new("Joe Smith", "123 Maple, Anytown NC", 12345) + * joe.length #=> 3 + */ + +static VALUE +rb_struct_size(s) + VALUE s; +{ + return LONG2FIX(RSTRUCT(s)->len); +} + +/* + * A <code>Struct</code> is a convenient way to bundle a number of + * attributes together, using accessor methods, without having to write + * an explicit class. + * + * The <code>Struct</code> class is a generator of specific classes, + * each one of which is defined to hold a set of variables and their + * accessors. In these examples, we'll call the generated class + * ``<i>Customer</i>Class,'' and we'll show an example instance of that + * class as ``<i>Customer</i>Inst.'' + * + * In the descriptions that follow, the parameter <i>symbol</i> refers + * to a symbol, which is either a quoted string or a + * <code>Symbol</code> (such as <code>:name</code>). + */ +void +Init_Struct() +{ + rb_cStruct = rb_define_class("Struct", rb_cObject); + rb_include_module(rb_cStruct, rb_mEnumerable); + + rb_undef_alloc_func(rb_cStruct); + rb_define_singleton_method(rb_cStruct, "new", rb_struct_s_def, -1); + + rb_define_method(rb_cStruct, "initialize", rb_struct_initialize, -2); + rb_define_method(rb_cStruct, "initialize_copy", rb_struct_init_copy, 1); + + rb_define_method(rb_cStruct, "==", rb_struct_equal, 1); + rb_define_method(rb_cStruct, "eql?", rb_struct_eql, 1); + rb_define_method(rb_cStruct, "hash", rb_struct_hash, 0); + + rb_define_method(rb_cStruct, "to_s", rb_struct_inspect, 0); + rb_define_method(rb_cStruct, "inspect", rb_struct_inspect, 0); + rb_define_method(rb_cStruct, "to_a", rb_struct_to_a, 0); + rb_define_method(rb_cStruct, "values", rb_struct_to_a, 0); + rb_define_method(rb_cStruct, "size", rb_struct_size, 0); + rb_define_method(rb_cStruct, "length", rb_struct_size, 0); + + rb_define_method(rb_cStruct, "each", rb_struct_each, 0); + rb_define_method(rb_cStruct, "each_pair", rb_struct_each_pair, 0); + rb_define_method(rb_cStruct, "[]", rb_struct_aref, 1); + rb_define_method(rb_cStruct, "[]=", rb_struct_aset, 2); + rb_define_method(rb_cStruct, "select", rb_struct_select, -1); + rb_define_method(rb_cStruct, "values_at", rb_struct_values_at, -1); + + rb_define_method(rb_cStruct, "members", rb_struct_members_m, 0); +} +/********************************************************************** + + time.c - + + $Author: matz $ + $Date: 2005/03/04 06:47:41 $ + created at: Tue Dec 28 14:31:59 JST 1993 + + Copyright (C) 1993-2003 Yukihiro Matsumoto + +**********************************************************************/ + +#include "ruby.h" +#include <sys/types.h> +#include <time.h> + +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif + +#include <math.h> + +VALUE rb_cTime; + +struct time_object { + struct timeval tv; + struct tm tm; + int gmt; + int tm_got; +}; + +#define GetTimeval(obj, tobj) \ + Data_Get_Struct(obj, struct time_object, tobj) + +static void time_free _((void *)); + +static void +time_free(tobj) + void *tobj; +{ + if (tobj) free(tobj); +} + +static VALUE time_s_alloc _((VALUE)); +static VALUE +time_s_alloc(klass) + VALUE klass; +{ + VALUE obj; + struct time_object *tobj; + + obj = Data_Make_Struct(klass, struct time_object, 0, time_free, tobj); + tobj->tm_got=0; + tobj->tv.tv_sec = 0; + tobj->tv.tv_usec = 0; + + return obj; +} + +static void +time_modify(time) + VALUE time; +{ + rb_check_frozen(time); + if (!OBJ_TAINTED(time) && rb_safe_level() >= 4) + rb_raise(rb_eSecurityError, "Insecure: can't modify Time"); +} + +/* + * Document-method: now + * + * Synonym for <code>Time.new</code>. Returns a +Time+ object + * initialized tot he current system time. + */ + +/* + * call-seq: + * Time.new -> time + * + * Returns a <code>Time</code> object initialized to the current system + * time. <b>Note:</b> The object created will be created using the + * resolution available on your system clock, and so may include + * fractional seconds. + * + * a = Time.new #=> Wed Apr 09 08:56:03 CDT 2003 + * b = Time.new #=> Wed Apr 09 08:56:03 CDT 2003 + * a == b #=> false + * "%.6f" % a.to_f #=> "1049896563.230740" + * "%.6f" % b.to_f #=> "1049896563.231466" + * + */ + +static VALUE +time_init(time) + VALUE time; +{ + struct time_object *tobj; + + time_modify(time); + GetTimeval(time, tobj); + tobj->tm_got=0; + tobj->tv.tv_sec = 0; + tobj->tv.tv_usec = 0; + if (gettimeofday(&tobj->tv, 0) < 0) { + rb_sys_fail("gettimeofday"); + } + + return time; +} + +#define NDIV(x,y) (-(-((x)+1)/(y))-1) +#define NMOD(x,y) ((y)-(-((x)+1)%(y))-1) + +void +time_overflow_p(secp, usecp) + time_t *secp, *usecp; +{ + time_t tmp, sec = *secp, usec = *usecp; + + if (usec >= 1000000) { /* usec positive overflow */ + tmp = sec + usec / 1000000; + usec %= 1000000; + if (sec > 0 && tmp < 0) { + rb_raise(rb_eRangeError, "out of Time range"); + } + sec = tmp; + } + if (usec < 0) { /* usec negative overflow */ + tmp = sec + NDIV(usec,1000000); /* negative div */ + usec = NMOD(usec,1000000); /* negative mod */ + if (sec < 0 && tmp > 0) { + rb_raise(rb_eRangeError, "out of Time range"); + } + sec = tmp; + } +#ifndef NEGATIVE_TIME_T + if (sec < 0 || (sec == 0 && usec < 0)) + rb_raise(rb_eArgError, "time must be positive"); +#endif + *secp = sec; + *usecp = usec; +} + +static VALUE +time_new_internal(klass, sec, usec) + VALUE klass; + time_t sec, usec; +{ + VALUE time = time_s_alloc(klass); + struct time_object *tobj; + + GetTimeval(time, tobj); + time_overflow_p(&sec, &usec); + tobj->tv.tv_sec = sec; + tobj->tv.tv_usec = usec; + + return time; +} + +VALUE +rb_time_new(sec, usec) + time_t sec, usec; +{ + return time_new_internal(rb_cTime, sec, usec); +} + +static struct timeval +time_timeval(time, interval) + VALUE time; + int interval; +{ + struct timeval t; + char *tstr = interval ? "time interval" : "time"; + +#ifndef NEGATIVE_TIME_T + interval = 1; +#endif + + switch (TYPE(time)) { + case T_FIXNUM: + t.tv_sec = FIX2LONG(time); + if (interval && t.tv_sec < 0) + rb_raise(rb_eArgError, "%s must be positive", tstr); + t.tv_usec = 0; + break; + + case T_FLOAT: + if (interval && RFLOAT(time)->value < 0.0) + rb_raise(rb_eArgError, "%s must be positive", tstr); + else { + double f, d; + + d = modf(RFLOAT(time)->value, &f); + t.tv_sec = (time_t)f; + if (f != t.tv_sec) { + rb_raise(rb_eRangeError, "%f out of Time range", RFLOAT(time)->value); + } + t.tv_usec = (time_t)(d*1e6); + } + break; + + case T_BIGNUM: + t.tv_sec = NUM2LONG(time); + if (interval && t.tv_sec < 0) + rb_raise(rb_eArgError, "%s must be positive", tstr); + t.tv_usec = 0; + break; + + default: + rb_raise(rb_eTypeError, "can't convert %s into %s", + rb_obj_classname(time), tstr); + break; + } + return t; +} + +struct timeval +rb_time_interval(time) + VALUE time; +{ + return time_timeval(time, Qtrue); +} + +struct timeval +rb_time_timeval(time) + VALUE time; +{ + struct time_object *tobj; + struct timeval t; + + if (TYPE(time) == T_DATA && RDATA(time)->dfree == time_free) { + GetTimeval(time, tobj); + t = tobj->tv; + return t; + } + return time_timeval(time, Qfalse); +} + +/* + * call-seq: + * Time.at( aTime ) => time + * Time.at( seconds [, microseconds] ) => time + * + * Creates a new time object with the value given by <i>aTime</i>, or + * the given number of <i>seconds</i> (and optional + * <i>microseconds</i>) from epoch. A non-portable feature allows the + * offset to be negative on some systems. + * + * Time.at(0) #=> Wed Dec 31 18:00:00 CST 1969 + * Time.at(946702800) #=> Fri Dec 31 23:00:00 CST 1999 + * Time.at(-284061600) #=> Sat Dec 31 00:00:00 CST 1960 + */ + +static VALUE +time_s_at(argc, argv, klass) + int argc; + VALUE *argv; + VALUE klass; +{ + struct timeval tv; + VALUE time, t; + + if (rb_scan_args(argc, argv, "11", &time, &t) == 2) { + tv.tv_sec = NUM2LONG(time); + tv.tv_usec = NUM2LONG(t); + } + else { + tv = rb_time_timeval(time); + } + t = time_new_internal(klass, tv.tv_sec, tv.tv_usec); + if (TYPE(time) == T_DATA && RDATA(time)->dfree == time_free) { + struct time_object *tobj, *tobj2; + + GetTimeval(time, tobj); + GetTimeval(t, tobj2); + tobj2->gmt = tobj->gmt; + } + return t; +} + +static char *months [12] = { + "jan", "feb", "mar", "apr", "may", "jun", + "jul", "aug", "sep", "oct", "nov", "dec", +}; + +static long +obj2long(obj) + VALUE obj; +{ + if (TYPE(obj) == T_STRING) { + obj = rb_str_to_inum(obj, 10, Qfalse); + } + + return NUM2LONG(obj); +} + +static void +time_arg(argc, argv, tm, usec) + int argc; + VALUE *argv; + struct tm *tm; + time_t *usec; +{ + VALUE v[8]; + int i; + long year; + + MEMZERO(tm, struct tm, 1); + *usec = 0; + if (argc == 10) { + v[0] = argv[5]; + v[1] = argv[4]; + v[2] = argv[3]; + v[3] = argv[2]; + v[4] = argv[1]; + v[5] = argv[0]; + v[6] = Qnil; + tm->tm_isdst = RTEST(argv[8]) ? 1 : 0; + } + else { + rb_scan_args(argc, argv, "17", &v[0],&v[1],&v[2],&v[3],&v[4],&v[5],&v[6],&v[7]); + /* v[6] may be usec or zone (parsedate) */ + /* v[7] is wday (parsedate; ignored) */ + tm->tm_wday = -1; + tm->tm_isdst = -1; + } + + year = obj2long(v[0]); + + if (0 <= year && year < 39) { + year += 100; + rb_warning("2 digits year is used"); + } + else if (69 <= year && year < 139) { + rb_warning("2 or 3 digits year is used"); + } + else { + year -= 1900; + } + + tm->tm_year = year; + + if (NIL_P(v[1])) { + tm->tm_mon = 0; + } + else { + VALUE s = rb_check_string_type(v[1]); + if (!NIL_P(s)) { + tm->tm_mon = -1; + for (i=0; i<12; i++) { + if (RSTRING(s)->len == 3 && + strcasecmp(months[i], RSTRING(v[1])->ptr) == 0) { + tm->tm_mon = i; + break; + } + } + if (tm->tm_mon == -1) { + char c = RSTRING(s)->ptr[0]; + + if ('0' <= c && c <= '9') { + tm->tm_mon = obj2long(s)-1; + } + } + } + else { + tm->tm_mon = obj2long(v[1])-1; + } + } + if (NIL_P(v[2])) { + tm->tm_mday = 1; + } + else { + tm->tm_mday = obj2long(v[2]); + } + tm->tm_hour = NIL_P(v[3])?0:obj2long(v[3]); + tm->tm_min = NIL_P(v[4])?0:obj2long(v[4]); + tm->tm_sec = NIL_P(v[5])?0:obj2long(v[5]); + if (!NIL_P(v[6])) { + /* when argc == 8, v[6] is timezone, but ignored */ + if (argc == 7) { + *usec = obj2long(v[6]); + } + } + + /* value validation */ + if ( + tm->tm_year != year || +#ifndef NEGATIVE_TIME_T + tm->tm_year < 69 || +#endif + tm->tm_mon < 0 || tm->tm_mon > 11 + || tm->tm_mday < 1 || tm->tm_mday > 31 + || tm->tm_hour < 0 || tm->tm_hour > 23 + || tm->tm_min < 0 || tm->tm_min > 59 + || tm->tm_sec < 0 || tm->tm_sec > 60) + rb_raise(rb_eArgError, "argument out of range"); +} + +static VALUE time_gmtime _((VALUE)); +static VALUE time_localtime _((VALUE)); +static VALUE time_get_tm _((VALUE, int)); + +static int +leap_year_p(y) + long y; +{ + return ((y % 4 == 0) && (y % 100 != 0)) || (y % 400 == 0); +} + +#define DIV(n,d) ((n)<0 ? NDIV((n),(d)) : (n)/(d)) + +static time_t +timegm_noleapsecond(tm) + struct tm *tm; +{ + static int common_year_yday_offset[] = { + -1, + -1 + 31, + -1 + 31 + 28, + -1 + 31 + 28 + 31, + -1 + 31 + 28 + 31 + 30, + -1 + 31 + 28 + 31 + 30 + 31, + -1 + 31 + 28 + 31 + 30 + 31 + 30, + -1 + 31 + 28 + 31 + 30 + 31 + 30 + 31, + -1 + 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31, + -1 + 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30, + -1 + 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31, + -1 + 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31 + 30 + /* 1 2 3 4 5 6 7 8 9 10 11 */ + }; + static int leap_year_yday_offset[] = { + -1, + -1 + 31, + -1 + 31 + 29, + -1 + 31 + 29 + 31, + -1 + 31 + 29 + 31 + 30, + -1 + 31 + 29 + 31 + 30 + 31, + -1 + 31 + 29 + 31 + 30 + 31 + 30, + -1 + 31 + 29 + 31 + 30 + 31 + 30 + 31, + -1 + 31 + 29 + 31 + 30 + 31 + 30 + 31 + 31, + -1 + 31 + 29 + 31 + 30 + 31 + 30 + 31 + 31 + 30, + -1 + 31 + 29 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31, + -1 + 31 + 29 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31 + 30 + /* 1 2 3 4 5 6 7 8 9 10 11 */ + }; + + long tm_year = tm->tm_year; + int tm_yday = tm->tm_mday; + if (leap_year_p(tm_year + 1900)) + tm_yday += leap_year_yday_offset[tm->tm_mon]; + else + tm_yday += common_year_yday_offset[tm->tm_mon]; + + /* + * `Seconds Since the Epoch' in SUSv3: + * tm_sec + tm_min*60 + tm_hour*3600 + tm_yday*86400 + + * (tm_year-70)*31536000 + ((tm_year-69)/4)*86400 - + * ((tm_year-1)/100)*86400 + ((tm_year+299)/400)*86400 + */ + return tm->tm_sec + tm->tm_min*60 + tm->tm_hour*3600 + + (time_t)(tm_yday + + (tm_year-70)*365 + + DIV(tm_year-69,4) - + DIV(tm_year-1,100) + + DIV(tm_year+299,400))*86400; +} + +static int +tmcmp(a, b) + struct tm *a; + struct tm *b; +{ + if (a->tm_year != b->tm_year) + return a->tm_year < b->tm_year ? -1 : 1; + else if (a->tm_mon != b->tm_mon) + return a->tm_mon < b->tm_mon ? -1 : 1; + else if (a->tm_mday != b->tm_mday) + return a->tm_mday < b->tm_mday ? -1 : 1; + else if (a->tm_hour != b->tm_hour) + return a->tm_hour < b->tm_hour ? -1 : 1; + else if (a->tm_min != b->tm_min) + return a->tm_min < b->tm_min ? -1 : 1; + else if (a->tm_sec != b->tm_sec) + return a->tm_sec < b->tm_sec ? -1 : 1; + else + return 0; +} + +static time_t +search_time_t(tptr, utc_p) + struct tm *tptr; + int utc_p; +{ + time_t guess, guess_lo, guess_hi; + struct tm *tm, tm_lo, tm_hi; + int d, have_guess; + int find_dst; + + find_dst = 0 < tptr->tm_isdst; + +#ifdef NEGATIVE_TIME_T + guess_lo = 1L << (8 * sizeof(time_t) - 1); +#else + guess_lo = 0; +#endif + guess_hi = ((time_t)-1) < ((time_t)0) ? + (1UL << (8 * sizeof(time_t) - 1)) - 1 : + ~(time_t)0; + + guess = timegm_noleapsecond(tptr); + tm = (utc_p ? gmtime : localtime)(&guess); + if (tm) { + d = tmcmp(tptr, tm); + if (d == 0) return guess; + if (d < 0) { + guess_hi = guess; + guess -= 24 * 60 * 60; + } + else { + guess_lo = guess; + guess += 24 * 60 * 60; + } + if (guess_lo < guess && guess < guess_hi && + (tm = (utc_p ? gmtime : localtime)(&guess)) != NULL) { + d = tmcmp(tptr, tm); + if (d == 0) return guess; + if (d < 0) + guess_hi = guess; + else + guess_lo = guess; + } + } + + tm = (utc_p ? gmtime : localtime)(&guess_lo); + if (!tm) goto error; + d = tmcmp(tptr, tm); + if (d < 0) goto out_of_range; + if (d == 0) return guess_lo; + tm_lo = *tm; + + tm = (utc_p ? gmtime : localtime)(&guess_hi); + if (!tm) goto error; + d = tmcmp(tptr, tm); + if (d > 0) goto out_of_range; + if (d == 0) return guess_hi; + tm_hi = *tm; + + have_guess = 0; + + while (guess_lo + 1 < guess_hi) { + /* there is a gap between guess_lo and guess_hi. */ + unsigned long range = 0; + if (!have_guess) { + int a, b; + /* + Try precious guess by a linear interpolation at first. + `a' and `b' is a coefficient of guess_lo and guess_hi as: + + guess = (guess_lo * a + guess_hi * b) / (a + b) + + However this causes overflow in most cases, following assignment + is used instead: + + guess = guess_lo / d * a + (guess_lo % d) * a / d + + guess_hi / d * b + (guess_hi % d) * b / d + where d = a + b + + To avoid overflow in this assignment, `d' is restricted to less than + sqrt(2**31). By this restriction and other reasons, the guess is + not accurate and some error is expected. `range' approximates + the maximum error. + + When these parameters are not suitable, i.e. guess is not within + guess_lo and guess_hi, simple guess by binary search is used. + */ + range = 366 * 24 * 60 * 60; + a = (tm_hi.tm_year - tptr->tm_year); + b = (tptr->tm_year - tm_lo.tm_year); + /* 46000 is selected as `some big number less than sqrt(2**31)'. */ + if (a + b <= 46000 / 12) { + range = 31 * 24 * 60 * 60; + a *= 12; + b *= 12; + a += tm_hi.tm_mon - tptr->tm_mon; + b += tptr->tm_mon - tm_lo.tm_mon; + if (a + b <= 46000 / 31) { + range = 24 * 60 * 60; + a *= 31; + b *= 31; + a += tm_hi.tm_mday - tptr->tm_mday; + b += tptr->tm_mday - tm_lo.tm_mday; + if (a + b <= 46000 / 24) { + range = 60 * 60; + a *= 24; + b *= 24; + a += tm_hi.tm_hour - tptr->tm_hour; + b += tptr->tm_hour - tm_lo.tm_hour; + if (a + b <= 46000 / 60) { + range = 60; + a *= 60; + b *= 60; + a += tm_hi.tm_min - tptr->tm_min; + b += tptr->tm_min - tm_lo.tm_min; + if (a + b <= 46000 / 60) { + range = 1; + a *= 60; + b *= 60; + a += tm_hi.tm_sec - tptr->tm_sec; + b += tptr->tm_sec - tm_lo.tm_sec; + } + } + } + } + } + if (a <= 0) a = 1; + if (b <= 0) b = 1; + d = a + b; + /* + Although `/' and `%' may produce unexpected result with negative + argument, it doesn't cause serious problem because there is a + fail safe. + */ + guess = guess_lo / d * a + (guess_lo % d) * a / d + + guess_hi / d * b + (guess_hi % d) * b / d; + have_guess = 1; + } + + if (guess <= guess_lo || guess_hi <= guess) { + /* Precious guess is invalid. try binary search. */ + guess = guess_lo / 2 + guess_hi / 2; + if (guess <= guess_lo) + guess = guess_lo + 1; + else if (guess >= guess_hi) + guess = guess_hi - 1; + range = 0; + } + + tm = (utc_p ? gmtime : localtime)(&guess); + if (!tm) goto error; + have_guess = 0; + + d = tmcmp(tptr, tm); + if (d < 0) { + guess_hi = guess; + tm_hi = *tm; + if (range) { + guess = guess - range; + range = 0; + if (guess_lo < guess && guess < guess_hi) + have_guess = 1; + } + } + else if (d > 0) { + guess_lo = guess; + tm_lo = *tm; + if (range) { + guess = guess + range; + range = 0; + if (guess_lo < guess && guess < guess_hi) + have_guess = 1; + } + } + else { + if (!utc_p) { + /* If localtime is nonmonotonic, another result may exist. */ + time_t guess2; + if (find_dst) { + guess2 = guess - 2 * 60 * 60; + tm = localtime(&guess2); + if (tm) { + if (tptr->tm_hour != (tm->tm_hour + 2) % 24 || + tptr->tm_min != tm->tm_min || + tptr->tm_sec != tm->tm_sec + ) { + guess2 -= (tm->tm_hour - tptr->tm_hour) * 60 * 60 + + (tm->tm_min - tptr->tm_min) * 60 + + (tm->tm_sec - tptr->tm_sec); + if (tptr->tm_mday != tm->tm_mday) + guess2 += 24 * 60 * 60; + if (guess != guess2) { + tm = localtime(&guess2); + if (tmcmp(tptr, tm) == 0) { + if (guess < guess2) + return guess; + else + return guess2; + } + } + } + } + } + else { + guess2 = guess + 2 * 60 * 60; + tm = localtime(&guess2); + if (tm) { + if ((tptr->tm_hour + 2) % 24 != tm->tm_hour || + tptr->tm_min != tm->tm_min || + tptr->tm_sec != tm->tm_sec + ) { + guess2 -= (tm->tm_hour - tptr->tm_hour) * 60 * 60 + + (tm->tm_min - tptr->tm_min) * 60 + + (tm->tm_sec - tptr->tm_sec); + if (tptr->tm_mday != tm->tm_mday) + guess2 -= 24 * 60 * 60; + if (guess != guess2) { + tm = localtime(&guess2); + if (tmcmp(tptr, tm) == 0) { + if (guess < guess2) + return guess2; + else + return guess; + } + } + } + } + } + } + return guess; + } + } + /* Given argument has no corresponding time_t. Let's outerpolation. */ + if (tm_lo.tm_year == tptr->tm_year && tm_lo.tm_mon == tptr->tm_mon) { + return guess_lo + + (tptr->tm_mday - tm_lo.tm_mday) * 24 * 60 * 60 + + (tptr->tm_hour - tm_lo.tm_hour) * 60 * 60 + + (tptr->tm_min - tm_lo.tm_min) * 60 + + (tptr->tm_sec - tm_lo.tm_sec); + } + else if (tm_hi.tm_year == tptr->tm_year && tm_hi.tm_mon == tptr->tm_mon) { + return guess_hi + + (tptr->tm_mday - tm_hi.tm_mday) * 24 * 60 * 60 + + (tptr->tm_hour - tm_hi.tm_hour) * 60 * 60 + + (tptr->tm_min - tm_hi.tm_min) * 60 + + (tptr->tm_sec - tm_hi.tm_sec); + } + + out_of_range: + rb_raise(rb_eArgError, "time out of range"); + + error: + rb_raise(rb_eArgError, "gmtime/localtime error"); + return 0; /* not reached */ +} + +static time_t +make_time_t(tptr, utc_p) + struct tm *tptr; + int utc_p; +{ + time_t t; + struct tm *tmp, buf; + buf = *tptr; + if (utc_p) { +#if defined(HAVE_TIMEGM) + if ((t = timegm(&buf)) != -1) + return t; +#ifdef NEGATIVE_TIME_T + if ((tmp = gmtime(&t)) && + tptr->tm_year == tmp->tm_year && + tptr->tm_mon == tmp->tm_mon && + tptr->tm_mday == tmp->tm_mday && + tptr->tm_hour == tmp->tm_hour && + tptr->tm_min == tmp->tm_min && + tptr->tm_sec == tmp->tm_sec + ) + return t; +#endif +#endif + return search_time_t(&buf, utc_p); + } + else { +#if defined(HAVE_MKTIME) + if ((t = mktime(&buf)) != -1) + return t; +#ifdef NEGATIVE_TIME_T + if ((tmp = localtime(&t)) && + tptr->tm_year == tmp->tm_year && + tptr->tm_mon == tmp->tm_mon && + tptr->tm_mday == tmp->tm_mday && + tptr->tm_hour == tmp->tm_hour && + tptr->tm_min == tmp->tm_min && + tptr->tm_sec == tmp->tm_sec + ) + return t; +#endif +#endif + return search_time_t(&buf, utc_p); + } +} + +static VALUE +time_utc_or_local(argc, argv, utc_p, klass) + int argc; + VALUE *argv; + int utc_p; + VALUE klass; +{ + struct tm tm; + VALUE time; + time_t usec; + + time_arg(argc, argv, &tm, &usec); + time = time_new_internal(klass, make_time_t(&tm, utc_p), usec); + if (utc_p) return time_gmtime(time); + return time_localtime(time); +} + +/* + * call-seq: + * Time.utc( year [, month, day, hour, min, sec, usec] ) => time + * Time.utc( sec, min, hour, day, month, year, wday, yday, isdst, tz + * ) => time + * Time.gm( year [, month, day, hour, min, sec, usec] ) => time + * Time.gm( sec, min, hour, day, month, year, wday, yday, isdst, tz + * ) => time + * + * Creates a time based on given values, interpreted as UTC (GMT). The + * year must be specified. Other values default to the minimum value + * for that field (and may be <code>nil</code> or omitted). Months may + * be specified by numbers from 1 to 12, or by the three-letter English + * month names. Hours are specified on a 24-hour clock (0..23). Raises + * an <code>ArgumentError</code> if any values are out of range. Will + * also accept ten arguments in the order output by + * <code>Time#to_a</code>. + * + * Time.utc(2000,"jan",1,20,15,1) #=> Sat Jan 01 20:15:01 UTC 2000 + * Time.gm(2000,"jan",1,20,15,1) #=> Sat Jan 01 20:15:01 UTC 2000 + */ +static VALUE +time_s_mkutc(argc, argv, klass) + int argc; + VALUE *argv; + VALUE klass; +{ + return time_utc_or_local(argc, argv, Qtrue, klass); +} + +/* + * call-seq: + * Time.local( year [, month, day, hour, min, sec, usec] ) => time + * Time.local( sec, min, hour, day, month, year, wday, yday, isdst, + * tz ) => time + * Time.mktime( year, month, day, hour, min, sec, usec ) => time + * + * Same as <code>Time::gm</code>, but interprets the values in the + * local time zone. + * + * Time.local(2000,"jan",1,20,15,1) #=> Sat Jan 01 20:15:01 CST 2000 + */ + +static VALUE +time_s_mktime(argc, argv, klass) + int argc; + VALUE *argv; + VALUE klass; +{ + return time_utc_or_local(argc, argv, Qfalse, klass); +} + +/* + * call-seq: + * time.to_i => int + * time.tv_sec => int + * + * Returns the value of <i>time</i> as an integer number of seconds + * since epoch. + * + * t = Time.now + * "%10.5f" % t.to_f #=> "1049896564.17839" + * t.to_i #=> 1049896564 + */ + +static VALUE +time_to_i(time) + VALUE time; +{ + struct time_object *tobj; + + GetTimeval(time, tobj); + return LONG2NUM(tobj->tv.tv_sec); +} + +/* + * call-seq: + * time.to_f => float + * + * Returns the value of <i>time</i> as a floating point number of + * seconds since epoch. + * + * t = Time.now + * "%10.5f" % t.to_f #=> "1049896564.13654" + * t.to_i #=> 1049896564 + */ + +static VALUE +time_to_f(time) + VALUE time; +{ + struct time_object *tobj; + + GetTimeval(time, tobj); + return rb_float_new((double)tobj->tv.tv_sec+(double)tobj->tv.tv_usec/1e6); +} + +/* + * call-seq: + * time.usec => int + * time.tv_usec => int + * + * Returns just the number of microseconds for <i>time</i>. + * + * t = Time.now #=> Wed Apr 09 08:56:04 CDT 2003 + * "%10.6f" % t.to_f #=> "1049896564.259970" + * t.usec #=> 259970 + */ + +static VALUE +time_usec(time) + VALUE time; +{ + struct time_object *tobj; + + GetTimeval(time, tobj); + return LONG2NUM(tobj->tv.tv_usec); +} + +/* + * call-seq: + * time <=> other_time => -1, 0, +1 + * time <=> numeric => -1, 0, +1 + * + * Comparison---Compares <i>time</i> with <i>other_time</i> or with + * <i>numeric</i>, which is the number of seconds (possibly + * fractional) since epoch. + * + * t = Time.now #=> Wed Apr 09 08:56:03 CDT 2003 + * t2 = t + 2592000 #=> Fri May 09 08:56:03 CDT 2003 + * t <=> t2 #=> -1 + * t2 <=> t #=> 1 + * t <=> t #=> 0 + */ + +static VALUE +time_cmp(time1, time2) + VALUE time1, time2; +{ + struct time_object *tobj1, *tobj2; + + GetTimeval(time1, tobj1); + if (TYPE(time2) == T_DATA && RDATA(time2)->dfree == time_free) { + GetTimeval(time2, tobj2); + if (tobj1->tv.tv_sec == tobj2->tv.tv_sec) { + if (tobj1->tv.tv_usec == tobj2->tv.tv_usec) return INT2FIX(0); + if (tobj1->tv.tv_usec > tobj2->tv.tv_usec) return INT2FIX(1); + return INT2FIX(-1); + } + if (tobj1->tv.tv_sec > tobj2->tv.tv_sec) return INT2FIX(1); + return INT2FIX(-1); + } + + return Qnil; +} + +/* + * call-seq: + * time.eql?(other_time) + * + * Return <code>true</code> if <i>time</i> and <i>other_time</i> are + * both <code>Time</code> objects with the same seconds and fractional + * seconds. + */ + +static VALUE +time_eql(time1, time2) + VALUE time1, time2; +{ + struct time_object *tobj1, *tobj2; + + GetTimeval(time1, tobj1); + if (TYPE(time2) == T_DATA && RDATA(time2)->dfree == time_free) { + GetTimeval(time2, tobj2); + if (tobj1->tv.tv_sec == tobj2->tv.tv_sec) { + if (tobj1->tv.tv_usec == tobj2->tv.tv_usec) return Qtrue; + } + } + return Qfalse; +} + +/* + * call-seq: + * time.utc? => true or false + * time.gmt? => true or false + * + * Returns <code>true</code> if <i>time</i> represents a time in UTC + * (GMT). + * + * t = Time.now #=> Wed Apr 09 08:56:04 CDT 2003 + * t.utc? #=> false + * t = Time.gm(2000,"jan",1,20,15,1) #=> Sat Jan 01 20:15:01 UTC 2000 + * t.utc? #=> true + * + * t = Time.now #=> Wed Apr 09 08:56:03 CDT 2003 + * t.gmt? #=> false + * t = Time.gm(2000,1,1,20,15,1) #=> Sat Jan 01 20:15:01 UTC 2000 + * t.gmt? #=> true + */ + +static VALUE +time_utc_p(time) + VALUE time; +{ + struct time_object *tobj; + + GetTimeval(time, tobj); + if (tobj->gmt) return Qtrue; + return Qfalse; +} + +/* + * call-seq: + * time.hash => fixnum + * + * Return a hash code for this time object. + */ + +static VALUE +time_hash(time) + VALUE time; +{ + struct time_object *tobj; + long hash; + + GetTimeval(time, tobj); + hash = tobj->tv.tv_sec ^ tobj->tv.tv_usec; + return LONG2FIX(hash); +} + +/* :nodoc: */ +static VALUE +time_init_copy(copy, time) + VALUE copy, time; +{ + struct time_object *tobj, *tcopy; + + if (copy == time) return copy; + time_modify(copy); + if (TYPE(time) != T_DATA || RDATA(time)->dfree != time_free) { + rb_raise(rb_eTypeError, "wrong argument type"); + } + GetTimeval(time, tobj); + GetTimeval(copy, tcopy); + MEMCPY(tcopy, tobj, struct time_object, 1); + + return copy; +} + +static VALUE +time_dup(time) + VALUE time; +{ + VALUE dup = time_s_alloc(rb_cTime); + time_init_copy(dup, time); + return dup; +} + +/* + * call-seq: + * time.localtime => time + * + * Converts <i>time</i> to local time (using the local time zone in + * effect for this process) modifying the receiver. + * + * t = Time.gm(2000, "jan", 1, 20, 15, 1) + * t.gmt? #=> true + * t.localtime #=> Sat Jan 01 14:15:01 CST 2000 + * t.gmt? #=> false + */ + +static VALUE +time_localtime(time) + VALUE time; +{ + struct time_object *tobj; + struct tm *tm_tmp; + time_t t; + + GetTimeval(time, tobj); + if (!tobj->gmt) { + if (tobj->tm_got) + return time; + } + else { + time_modify(time); + } + t = tobj->tv.tv_sec; + tm_tmp = localtime(&t); + if (!tm_tmp) + rb_raise(rb_eArgError, "localtime error"); + tobj->tm = *tm_tmp; + tobj->tm_got = 1; + tobj->gmt = 0; + return time; +} + +/* + * call-seq: + * time.gmtime => time + * time.utc => time + * + * Converts <i>time</i> to UTC (GMT), modifying the receiver. + * + * t = Time.now #=> Wed Apr 09 08:56:03 CDT 2003 + * t.gmt? #=> false + * t.gmtime #=> Wed Apr 09 13:56:03 UTC 2003 + * t.gmt? #=> true + * + * t = Time.now #=> Wed Apr 09 08:56:04 CDT 2003 + * t.utc? #=> false + * t.utc #=> Wed Apr 09 13:56:04 UTC 2003 + * t.utc? #=> true + */ + +static VALUE +time_gmtime(time) + VALUE time; +{ + struct time_object *tobj; + struct tm *tm_tmp; + time_t t; + + GetTimeval(time, tobj); + if (tobj->gmt) { + if (tobj->tm_got) + return time; + } + else { + time_modify(time); + } + t = tobj->tv.tv_sec; + tm_tmp = gmtime(&t); + if (!tm_tmp) + rb_raise(rb_eArgError, "gmtime error"); + tobj->tm = *tm_tmp; + tobj->tm_got = 1; + tobj->gmt = 1; + return time; +} + +/* + * call-seq: + * time.getlocal => new_time + * + * Returns a new <code>new_time</code> object representing <i>time</i> in + * local time (using the local time zone in effect for this process). + * + * t = Time.gm(2000,1,1,20,15,1) #=> Sat Jan 01 20:15:01 UTC 2000 + * t.gmt? #=> true + * l = t.getlocal #=> Sat Jan 01 14:15:01 CST 2000 + * l.gmt? #=> false + * t == l #=> true + */ + +static VALUE +time_getlocaltime(time) + VALUE time; +{ + return time_localtime(time_dup(time)); +} + +/* + * call-seq: + * time.getgm => new_time + * time.getutc => new_time + * + * Returns a new <code>new_time</code> object representing <i>time</i> in + * UTC. + * + * t = Time.local(2000,1,1,20,15,1) #=> Sat Jan 01 20:15:01 CST 2000 + * t.gmt? #=> false + * y = t.getgm #=> Sun Jan 02 02:15:01 UTC 2000 + * y.gmt? #=> true + * t == y #=> true + */ + +static VALUE +time_getgmtime(time) + VALUE time; +{ + return time_gmtime(time_dup(time)); +} + +static VALUE +time_get_tm(time, gmt) + VALUE time; + int gmt; +{ + if (gmt) return time_gmtime(time); + return time_localtime(time); +} + +/* + * call-seq: + * time.asctime => string + * time.ctime => string + * + * Returns a canonical string representation of <i>time</i>. + * + * Time.now.asctime #=> "Wed Apr 9 08:56:03 2003" + */ + +static VALUE +time_asctime(time) + VALUE time; +{ + struct time_object *tobj; + char *s; + + GetTimeval(time, tobj); + if (tobj->tm_got == 0) { + time_get_tm(time, tobj->gmt); + } + s = asctime(&tobj->tm); + if (s[24] == '\n') s[24] = '\0'; + + return rb_str_new2(s); +} + +/* + * call-seq: + * time.inspect => string + * time.to_s => string + * + * Returns a string representing <i>time</i>. Equivalent to calling + * <code>Time#strftime</code> with a format string of ``<code>%a</code> + * <code>%b</code> <code>%d</code> <code>%H:%M:%S</code> + * <code>%Z</code> <code>%Y</code>''. + * + * Time.now.to_s #=> "Wed Apr 09 08:56:04 CDT 2003" + */ + +static VALUE +time_to_s(time) + VALUE time; +{ + struct time_object *tobj; + char buf[128]; + int len; + + GetTimeval(time, tobj); + if (tobj->tm_got == 0) { + time_get_tm(time, tobj->gmt); + } + if (tobj->gmt == 1) { + len = strftime(buf, 128, "%a %b %d %H:%M:%S UTC %Y", &tobj->tm); + } + else { + len = strftime(buf, 128, "%a %b %d %H:%M:%S %Z %Y", &tobj->tm); + } + return rb_str_new(buf, len); +} + +#if SIZEOF_TIME_T == SIZEOF_LONG +typedef unsigned long unsigned_time_t; +#elif SIZEOF_TIME_T == SIZEOF_INT +typedef unsigned int unsigned_time_t; +#elif SIZEOF_TIME_T == SIZEOF_LONG_LONG +typedef unsigned long long unsigned_time_t; +#else +# error cannot find integer type which size is same as time_t. +#endif + +static VALUE +time_add(tobj, offset, sign) + struct time_object *tobj; + VALUE offset; + int sign; +{ + double v = NUM2DBL(offset); + double f, d; + unsigned_time_t sec_off; + time_t usec_off, sec, usec; + VALUE result; + + if (v < 0) { + v = -v; + sign = -sign; + } + d = modf(v, &f); + sec_off = (unsigned_time_t)f; + if (f != (double)sec_off) + rb_raise(rb_eRangeError, "time %s %f out of Time range", + sign < 0 ? "-" : "+", v); + usec_off = (time_t)(d*1e6); + + if (sign < 0) { + sec = tobj->tv.tv_sec - sec_off; + usec = tobj->tv.tv_usec - usec_off; + if (sec > tobj->tv.tv_sec) + rb_raise(rb_eRangeError, "time - %f out of Time range", v); + } + else { + sec = tobj->tv.tv_sec + sec_off; + usec = tobj->tv.tv_usec + usec_off; + if (sec < tobj->tv.tv_sec) + rb_raise(rb_eRangeError, "time + %f out of Time range", v); + } + result = rb_time_new(sec, usec); + if (tobj->gmt) { + GetTimeval(result, tobj); + tobj->gmt = 1; + } + return result; +} + +/* + * call-seq: + * time + numeric => time + * + * Addition---Adds some number of seconds (possibly fractional) to + * <i>time</i> and returns that value as a new time. + * + * t = Time.now #=> Wed Apr 09 08:56:03 CDT 2003 + * t + (60 * 60 * 24) #=> Thu Apr 10 08:56:03 CDT 2003 + */ + +static VALUE +time_plus(time1, time2) + VALUE time1, time2; +{ + struct time_object *tobj; + GetTimeval(time1, tobj); + + if (TYPE(time2) == T_DATA && RDATA(time2)->dfree == time_free) { + rb_raise(rb_eTypeError, "time + time?"); + } + return time_add(tobj, time2, 1); +} + +/* + * call-seq: + * time - other_time => float + * time - numeric => time + * + * Difference---Returns a new time that represents the difference + * between two times, or subtracts the given number of seconds in + * <i>numeric</i> from <i>time</i>. + * + * t = Time.now #=> Wed Apr 09 08:56:03 CDT 2003 + * t2 = t + 2592000 #=> Fri May 09 08:56:03 CDT 2003 + * t2 - t #=> 2592000.0 + * t2 - 2592000 #=> Wed Apr 09 08:56:03 CDT 2003 + */ + +static VALUE +time_minus(time1, time2) + VALUE time1, time2; +{ + struct time_object *tobj; + + GetTimeval(time1, tobj); + if (TYPE(time2) == T_DATA && RDATA(time2)->dfree == time_free) { + struct time_object *tobj2; + double f; + + GetTimeval(time2, tobj2); + f = (double)tobj->tv.tv_sec - (double)tobj2->tv.tv_sec; + f += ((double)tobj->tv.tv_usec - (double)tobj2->tv.tv_usec)*1e-6; + /* XXX: should check float overflow on 64bit time_t platforms */ + + return rb_float_new(f); + } + return time_add(tobj, time2, -1); +} + +/* + * call-seq: + * time.succ => new_time + * + * Return a new time object, one second later than <code>time</code>. + */ + +static VALUE +time_succ(time) + VALUE time; +{ + struct time_object *tobj; + + GetTimeval(time, tobj); + return rb_time_new(tobj->tv.tv_sec + 1, tobj->tv.tv_usec); +} + +/* + * call-seq: + * time.sec => fixnum + * + * Returns the second of the minute (0..60)<em>[Yes, seconds really can + * range from zero to 60. This allows the system to inject leap seconds + * every now and then to correct for the fact that years are not really + * a convenient number of hours long.]</em> for <i>time</i>. + * + * t = Time.now #=> Wed Apr 09 08:56:04 CDT 2003 + * t.sec #=> 4 + */ + +static VALUE +time_sec(time) + VALUE time; +{ + struct time_object *tobj; + + GetTimeval(time, tobj); + if (tobj->tm_got == 0) { + time_get_tm(time, tobj->gmt); + } + return INT2FIX(tobj->tm.tm_sec); +} + +/* + * call-seq: + * time.min => fixnum + * + * Returns the minute of the hour (0..59) for <i>time</i>. + * + * t = Time.now #=> Wed Apr 09 08:56:03 CDT 2003 + * t.min #=> 56 + */ + +static VALUE +time_min(time) + VALUE time; +{ + struct time_object *tobj; + + GetTimeval(time, tobj); + if (tobj->tm_got == 0) { + time_get_tm(time, tobj->gmt); + } + return INT2FIX(tobj->tm.tm_min); +} + +/* + * call-seq: + * time.hour => fixnum + * + * Returns the hour of the day (0..23) for <i>time</i>. + * + * t = Time.now #=> Wed Apr 09 08:56:03 CDT 2003 + * t.hour #=> 8 + */ + +static VALUE +time_hour(time) + VALUE time; +{ + struct time_object *tobj; + + GetTimeval(time, tobj); + if (tobj->tm_got == 0) { + time_get_tm(time, tobj->gmt); + } + return INT2FIX(tobj->tm.tm_hour); +} + +/* + * call-seq: + * time.day => fixnum + * time.mday => fixnum + * + * Returns the day of the month (1..n) for <i>time</i>. + * + * t = Time.now #=> Wed Apr 09 08:56:03 CDT 2003 + * t.day #=> 9 + * t.mday #=> 9 + */ + +static VALUE +time_mday(time) + VALUE time; +{ + struct time_object *tobj; + + GetTimeval(time, tobj); + if (tobj->tm_got == 0) { + time_get_tm(time, tobj->gmt); + } + return INT2FIX(tobj->tm.tm_mday); +} + +/* + * call-seq: + * time.mon => fixnum + * time.month => fixnum + * + * Returns the month of the year (1..12) for <i>time</i>. + * + * t = Time.now #=> Wed Apr 09 08:56:03 CDT 2003 + * t.mon #=> 4 + * t.month #=> 4 + */ + +static VALUE +time_mon(time) + VALUE time; +{ + struct time_object *tobj; + + GetTimeval(time, tobj); + if (tobj->tm_got == 0) { + time_get_tm(time, tobj->gmt); + } + return INT2FIX(tobj->tm.tm_mon+1); +} + +/* + * call-seq: + * time.year => fixnum + * + * Returns the year for <i>time</i> (including the century). + * + * t = Time.now #=> Wed Apr 09 08:56:04 CDT 2003 + * t.year #=> 2003 + */ + +static VALUE +time_year(time) + VALUE time; +{ + struct time_object *tobj; + + GetTimeval(time, tobj); + if (tobj->tm_got == 0) { + time_get_tm(time, tobj->gmt); + } + return LONG2NUM((long)tobj->tm.tm_year+1900); +} + +/* + * call-seq: + * time.wday => fixnum + * + * Returns an integer representing the day of the week, 0..6, with + * Sunday == 0. + * + * t = Time.now #=> Wed Apr 09 08:56:04 CDT 2003 + * t.wday #=> 3 + */ + +static VALUE +time_wday(time) + VALUE time; +{ + struct time_object *tobj; + + GetTimeval(time, tobj); + if (tobj->tm_got == 0) { + time_get_tm(time, tobj->gmt); + } + return INT2FIX(tobj->tm.tm_wday); +} + +/* + * call-seq: + * time.yday => fixnum + * + * Returns an integer representing the day of the year, 1..366. + * + * t = Time.now #=> Wed Apr 09 08:56:04 CDT 2003 + * t.yday #=> 99 + */ + +static VALUE +time_yday(time) + VALUE time; +{ + struct time_object *tobj; + + GetTimeval(time, tobj); + if (tobj->tm_got == 0) { + time_get_tm(time, tobj->gmt); + } + return INT2FIX(tobj->tm.tm_yday+1); +} + +/* + * call-seq: + * time.isdst => true or false + * time.dst? => true or false + * + * Returns <code>true</code> if <i>time</i> occurs during Daylight + * Saving Time in its time zone. + * + * Time.local(2000, 7, 1).isdst #=> true + * Time.local(2000, 1, 1).isdst #=> false + * Time.local(2000, 7, 1).dst? #=> true + * Time.local(2000, 1, 1).dst? #=> false + */ + +static VALUE +time_isdst(time) + VALUE time; +{ + struct time_object *tobj; + + GetTimeval(time, tobj); + if (tobj->tm_got == 0) { + time_get_tm(time, tobj->gmt); + } + return tobj->tm.tm_isdst?Qtrue:Qfalse; +} + +/* + * call-seq: + * time.zone => string + * + * Returns the name of the time zone used for <i>time</i>. As of Ruby + * 1.8, returns ``UTC'' rather than ``GMT'' for UTC times. + * + * t = Time.gm(2000, "jan", 1, 20, 15, 1) + * t.zone #=> "UTC" + * t = Time.local(2000, "jan", 1, 20, 15, 1) + * t.zone #=> "CST" + */ + +static VALUE +time_zone(time) + VALUE time; +{ + struct time_object *tobj; +#if !defined(HAVE_TM_ZONE) && (!defined(HAVE_TZNAME) || !defined(HAVE_DAYLIGHT)) + char buf[64]; + int len; +#endif + + GetTimeval(time, tobj); + if (tobj->tm_got == 0) { + time_get_tm(time, tobj->gmt); + } + + if (tobj->gmt == 1) { + return rb_str_new2("UTC"); + } +#if defined(HAVE_TM_ZONE) + return rb_str_new2(tobj->tm.tm_zone); +#elif defined(HAVE_TZNAME) && defined(HAVE_DAYLIGHT) + return rb_str_new2(tzname[daylight && tobj->tm.tm_isdst]); +#else + len = strftime(buf, 64, "%Z", &tobj->tm); + return rb_str_new(buf, len); +#endif +} + +/* + * call-seq: + * time.gmt_offset => fixnum + * time.gmtoff => fixnum + * time.utc_offset => fixnum + * + * Returns the offset in seconds between the timezone of <i>time</i> + * and UTC. + * + * t = Time.gm(2000,1,1,20,15,1) #=> Sat Jan 01 20:15:01 UTC 2000 + * t.gmt_offset #=> 0 + * l = t.getlocal #=> Sat Jan 01 14:15:01 CST 2000 + * l.gmt_offset #=> -21600 + */ + +static VALUE +time_utc_offset(time) + VALUE time; +{ + struct time_object *tobj; + + GetTimeval(time, tobj); + if (tobj->tm_got == 0) { + time_get_tm(time, tobj->gmt); + } + + if (tobj->gmt == 1) { + return INT2FIX(0); + } + else { +#if defined(HAVE_STRUCT_TM_TM_GMTOFF) + return INT2NUM(tobj->tm.tm_gmtoff); +#else + struct tm *u, *l; + time_t t; + long off; + l = &tobj->tm; + t = tobj->tv.tv_sec; + u = gmtime(&t); + if (!u) + rb_raise(rb_eArgError, "gmtime error"); + if (l->tm_year != u->tm_year) + off = l->tm_year < u->tm_year ? -1 : 1; + else if (l->tm_mon != u->tm_mon) + off = l->tm_mon < u->tm_mon ? -1 : 1; + else if (l->tm_mday != u->tm_mday) + off = l->tm_mday < u->tm_mday ? -1 : 1; + else + off = 0; + off = off * 24 + l->tm_hour - u->tm_hour; + off = off * 60 + l->tm_min - u->tm_min; + off = off * 60 + l->tm_sec - u->tm_sec; + return LONG2FIX(off); +#endif + } +} + +/* + * call-seq: + * time.to_a => array + * + * Returns a ten-element <i>array</i> of values for <i>time</i>: + * {<code>[ sec, min, hour, day, month, year, wday, yday, isdst, zone + * ]</code>}. See the individual methods for an explanation of the + * valid ranges of each value. The ten elements can be passed directly + * to <code>Time::utc</code> or <code>Time::local</code> to create a + * new <code>Time</code>. + * + * now = Time.now #=> Wed Apr 09 08:56:04 CDT 2003 + * t = now.to_a #=> [4, 56, 8, 9, 4, 2003, 3, 99, true, "CDT"] + */ + +static VALUE +time_to_a(time) + VALUE time; +{ + struct time_object *tobj; + + GetTimeval(time, tobj); + if (tobj->tm_got == 0) { + time_get_tm(time, tobj->gmt); + } + return rb_ary_new3(10, + INT2FIX(tobj->tm.tm_sec), + INT2FIX(tobj->tm.tm_min), + INT2FIX(tobj->tm.tm_hour), + INT2FIX(tobj->tm.tm_mday), + INT2FIX(tobj->tm.tm_mon+1), + LONG2NUM((long)tobj->tm.tm_year+1900), + INT2FIX(tobj->tm.tm_wday), + INT2FIX(tobj->tm.tm_yday+1), + tobj->tm.tm_isdst?Qtrue:Qfalse, + time_zone(time)); +} + +#define SMALLBUF 100 +static int +rb_strftime(buf, format, time) + char ** volatile buf; + char * volatile format; + struct tm * volatile time; +{ + volatile int size; + int len, flen; + + (*buf)[0] = '\0'; + flen = strlen(format); + if (flen == 0) { + return 0; + } + len = strftime(*buf, SMALLBUF, format, time); + if (len != 0 || **buf == '\0') return len; + for (size=1024; ; size*=2) { + *buf = xmalloc(size); + (*buf)[0] = '\0'; + len = strftime(*buf, size, format, time); + /* + * buflen can be zero EITHER because there's not enough + * room in the string, or because the control command + * goes to the empty string. Make a reasonable guess that + * if the buffer is 1024 times bigger than the length of the + * format string, it's not failing for lack of room. + */ + if (len > 0 || size >= 1024 * flen) return len; + free(*buf); + } + /* not reached */ +} + +/* + * call-seq: + * time.strftime( string ) => string + * + * Formats <i>time</i> according to the directives in the given format + * string. Any text not listed as a directive will be passed through + * to the output string. + * + * Format meaning: + * %a - The abbreviated weekday name (``Sun'') + * %A - The full weekday name (``Sunday'') + * %b - The abbreviated month name (``Jan'') + * %B - The full month name (``January'') + * %c - The preferred local date and time representation + * %d - Day of the month (01..31) + * %H - Hour of the day, 24-hour clock (00..23) + * %I - Hour of the day, 12-hour clock (01..12) + * %j - Day of the year (001..366) + * %m - Month of the year (01..12) + * %M - Minute of the hour (00..59) + * %p - Meridian indicator (``AM'' or ``PM'') + * %S - Second of the minute (00..60) + * %U - Week number of the current year, + * starting with the first Sunday as the first + * day of the first week (00..53) + * %W - Week number of the current year, + * starting with the first Monday as the first + * day of the first week (00..53) + * %w - Day of the week (Sunday is 0, 0..6) + * %x - Preferred representation for the date alone, no time + * %X - Preferred representation for the time alone, no date + * %y - Year without a century (00..99) + * %Y - Year with century + * %Z - Time zone name + * %% - Literal ``%'' character + * + * t = Time.now + * t.strftime("Printed on %m/%d/%Y") #=> "Printed on 04/09/2003" + * t.strftime("at %I:%M%p") #=> "at 08:56AM" + */ + +static VALUE +time_strftime(time, format) + VALUE time, format; +{ + struct time_object *tobj; + char buffer[SMALLBUF]; + char *fmt, *buf = buffer; + long len; + VALUE str; + + GetTimeval(time, tobj); + if (tobj->tm_got == 0) { + time_get_tm(time, tobj->gmt); + } + StringValue(format); + format = rb_str_new4(format); + fmt = RSTRING(format)->ptr; + len = RSTRING(format)->len; + if (len == 0) { + rb_warning("strftime called with empty format string"); + } + else if (strlen(fmt) < len) { + /* Ruby string may contain \0's. */ + char *p = fmt, *pe = fmt + len; + + str = rb_str_new(0, 0); + while (p < pe) { + len = rb_strftime(&buf, p, &tobj->tm); + rb_str_cat(str, buf, len); + p += strlen(p) + 1; + if (p <= pe) + rb_str_cat(str, "\0", 1); + if (buf != buffer) { + free(buf); + buf = buffer; + } + } + return str; + } + else { + len = rb_strftime(&buf, RSTRING(format)->ptr, &tobj->tm); + } + str = rb_str_new(buf, len); + if (buf != buffer) free(buf); + return str; +} + +/* + * call-seq: + * Time.times => struct_tms + * + * Deprecated in favor of <code>Process::times</code> + */ + +static VALUE +time_s_times(obj) + VALUE obj; +{ + rb_warn("obsolete method Time::times; use Process::times"); + return rb_proc_times(obj); +} + +/* + * undocumented + */ + +static VALUE +time_mdump(time) + VALUE time; +{ + struct time_object *tobj; + struct tm *tm; + unsigned long p, s; + char buf[8]; + time_t t; + int i; + + GetTimeval(time, tobj); + + t = tobj->tv.tv_sec; + tm = gmtime(&t); + + if ((tm->tm_year & 0xffff) != tm->tm_year) + rb_raise(rb_eArgError, "year too big to marshal"); + + p = 0x1 << 31 | /* 1 */ + tobj->gmt << 30 | /* 1 */ + tm->tm_year << 14 | /* 16 */ + tm->tm_mon << 10 | /* 4 */ + tm->tm_mday << 5 | /* 5 */ + tm->tm_hour; /* 5 */ + s = tm->tm_min << 26 | /* 6 */ + tm->tm_sec << 20 | /* 6 */ + tobj->tv.tv_usec; /* 20 */ + + for (i=0; i<4; i++) { + buf[i] = p & 0xff; + p = RSHIFT(p, 8); + } + for (i=4; i<8; i++) { + buf[i] = s & 0xff; + s = RSHIFT(s, 8); + } + + return rb_str_new(buf, 8); +} + +/* + * call-seq: + * time._dump => string + * + * Dump _time_ for marshaling. + */ + +static VALUE +time_dump(argc, argv, time) + int argc; + VALUE *argv; + VALUE time; +{ + VALUE str; + + rb_scan_args(argc, argv, "01", 0); + str = time_mdump(time); + rb_copy_generic_ivar(str, time); + + return str; +} + +/* + * undocumented + */ + +static VALUE +time_mload(time, str) + VALUE time, str; +{ + struct time_object *tobj; + unsigned long p, s; + time_t sec, usec; + unsigned char *buf; + struct tm tm; + int i, gmt; + + time_modify(time); + StringValue(str); + buf = (unsigned char *)RSTRING(str)->ptr; + if (RSTRING(str)->len != 8) { + rb_raise(rb_eTypeError, "marshaled time format differ"); + } + + p = s = 0; + for (i=0; i<4; i++) { + p |= buf[i]<<(8*i); + } + for (i=4; i<8; i++) { + s |= buf[i]<<(8*(i-4)); + } + + if ((p & (1<<31)) == 0) { + sec = p; + usec = s; + } + else { + p &= ~(1<<31); + gmt = (p >> 30) & 0x1; + tm.tm_year = (p >> 14) & 0xffff; + tm.tm_mon = (p >> 10) & 0xf; + tm.tm_mday = (p >> 5) & 0x1f; + tm.tm_hour = p & 0x1f; + tm.tm_min = (s >> 26) & 0x3f; + tm.tm_sec = (s >> 20) & 0x3f; + tm.tm_isdst = 0; + + sec = make_time_t(&tm, Qtrue); + usec = (time_t)(s & 0xfffff); + } + time_overflow_p(&sec, &usec); + + GetTimeval(time, tobj); + tobj->tm_got = 0; + tobj->gmt = gmt; + tobj->tv.tv_sec = sec; + tobj->tv.tv_usec = usec; + return time; +} + +/* + * call-seq: + * Time._load(string) => time + * + * Unmarshal a dumped +Time+ object. + */ + +static VALUE +time_load(klass, str) + VALUE klass, str; +{ + VALUE time = time_s_alloc(klass); + + rb_copy_generic_ivar(time, str); + time_mload(time, str); + return time; +} + +/* + * <code>Time</code> is an abstraction of dates and times. Time is + * stored internally as the number of seconds and microseconds since + * the <em>epoch</em>, January 1, 1970 00:00 UTC. On some operating + * systems, this offset is allowed to be negative. Also see the + * library modules <code>Date</code> and <code>ParseDate</code>. The + * <code>Time</code> class treats GMT (Greenwich Mean Time) and UTC + * (Coordinated Universal Time)<em>[Yes, UTC really does stand for + * Coordinated Universal Time. There was a committee involved.]</em> + * as equivalent. GMT is the older way of referring to these + * baseline times but persists in the names of calls on Posix + * systems. + * + * All times are stored with some number of microseconds. Be aware of + * this fact when comparing times with each other---times that are + * apparently equal when displayed may be different when compared. + */ + +void +Init_Time() +{ + rb_cTime = rb_define_class("Time", rb_cObject); + rb_include_module(rb_cTime, rb_mComparable); + + rb_define_alloc_func(rb_cTime, time_s_alloc); + rb_define_singleton_method(rb_cTime, "now", rb_class_new_instance, -1); + rb_define_singleton_method(rb_cTime, "at", time_s_at, -1); + rb_define_singleton_method(rb_cTime, "utc", time_s_mkutc, -1); + rb_define_singleton_method(rb_cTime, "gm", time_s_mkutc, -1); + rb_define_singleton_method(rb_cTime, "local", time_s_mktime, -1); + rb_define_singleton_method(rb_cTime, "mktime", time_s_mktime, -1); + + rb_define_singleton_method(rb_cTime, "times", time_s_times, 0); + + rb_define_method(rb_cTime, "to_i", time_to_i, 0); + rb_define_method(rb_cTime, "to_f", time_to_f, 0); + rb_define_method(rb_cTime, "<=>", time_cmp, 1); + rb_define_method(rb_cTime, "eql?", time_eql, 1); + rb_define_method(rb_cTime, "hash", time_hash, 0); + rb_define_method(rb_cTime, "initialize", time_init, 0); + rb_define_method(rb_cTime, "initialize_copy", time_init_copy, 1); + + rb_define_method(rb_cTime, "localtime", time_localtime, 0); + rb_define_method(rb_cTime, "gmtime", time_gmtime, 0); + rb_define_method(rb_cTime, "utc", time_gmtime, 0); + rb_define_method(rb_cTime, "getlocal", time_getlocaltime, 0); + rb_define_method(rb_cTime, "getgm", time_getgmtime, 0); + rb_define_method(rb_cTime, "getutc", time_getgmtime, 0); + + rb_define_method(rb_cTime, "ctime", time_asctime, 0); + rb_define_method(rb_cTime, "asctime", time_asctime, 0); + rb_define_method(rb_cTime, "to_s", time_to_s, 0); + rb_define_method(rb_cTime, "inspect", time_to_s, 0); + rb_define_method(rb_cTime, "to_a", time_to_a, 0); + + rb_define_method(rb_cTime, "+", time_plus, 1); + rb_define_method(rb_cTime, "-", time_minus, 1); + + rb_define_method(rb_cTime, "succ", time_succ, 0); + rb_define_method(rb_cTime, "sec", time_sec, 0); + rb_define_method(rb_cTime, "min", time_min, 0); + rb_define_method(rb_cTime, "hour", time_hour, 0); + rb_define_method(rb_cTime, "mday", time_mday, 0); + rb_define_method(rb_cTime, "day", time_mday, 0); + rb_define_method(rb_cTime, "mon", time_mon, 0); + rb_define_method(rb_cTime, "month", time_mon, 0); + rb_define_method(rb_cTime, "year", time_year, 0); + rb_define_method(rb_cTime, "wday", time_wday, 0); + rb_define_method(rb_cTime, "yday", time_yday, 0); + rb_define_method(rb_cTime, "isdst", time_isdst, 0); + rb_define_method(rb_cTime, "dst?", time_isdst, 0); + rb_define_method(rb_cTime, "zone", time_zone, 0); + rb_define_method(rb_cTime, "gmtoff", time_utc_offset, 0); + rb_define_method(rb_cTime, "gmt_offset", time_utc_offset, 0); + rb_define_method(rb_cTime, "utc_offset", time_utc_offset, 0); + + rb_define_method(rb_cTime, "utc?", time_utc_p, 0); + rb_define_method(rb_cTime, "gmt?", time_utc_p, 0); + + rb_define_method(rb_cTime, "tv_sec", time_to_i, 0); + rb_define_method(rb_cTime, "tv_usec", time_usec, 0); + rb_define_method(rb_cTime, "usec", time_usec, 0); + + rb_define_method(rb_cTime, "strftime", time_strftime, 1); + + /* methods for marshaling */ + rb_define_method(rb_cTime, "_dump", time_dump, -1); + rb_define_singleton_method(rb_cTime, "_load", time_load, 1); +#if 0 + /* Time will support marshal_dump and marshal_load in the future (1.9 maybe) */ + rb_define_method(rb_cTime, "marshal_dump", time_mdump, 0); + rb_define_method(rb_cTime, "marshal_load", time_mload, 1); +#endif +} +/********************************************************************** + utf8.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define USE_INVALID_CODE_SCHEME + +#ifdef USE_INVALID_CODE_SCHEME +/* virtual codepoint values for invalid encoding byte 0xfe and 0xff */ +#define INVALID_CODE_FE 0xfffffffe +#define INVALID_CODE_FF 0xffffffff +#define VALID_CODE_LIMIT 0x7fffffff +#endif + +#define utf8_islead(c) ((UChar )((c) & 0xc0) != 0x80) + +static int EncLen_UTF8[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1 +}; + +static int +utf8_mbc_enc_len(const UChar* p) +{ + return EncLen_UTF8[*p]; +} + +static OnigCodePoint +utf8_mbc_to_code(const UChar* p, const UChar* end) +{ + int c, len; + OnigCodePoint n; + + len = enc_len(ONIG_ENCODING_UTF8, p); + c = *p++; + if (len > 1) { + len--; + n = c & ((1 << (6 - len)) - 1); + while (len--) { + c = *p++; + n = (n << 6) | (c & ((1 << 6) - 1)); + } + return n; + } + else { +#ifdef USE_INVALID_CODE_SCHEME + if (c > 0xfd) { + return ((c == 0xfe) ? INVALID_CODE_FE : INVALID_CODE_FF); + } +#endif + return (OnigCodePoint )c; + } +} + +static int +utf8_code_to_mbclen(OnigCodePoint code) +{ + if ((code & 0xffffff80) == 0) return 1; + else if ((code & 0xfffff800) == 0) { + if (code <= 0xff && code >= 0xfe) + return 1; + return 2; + } + else if ((code & 0xffff0000) == 0) return 3; + else if ((code & 0xffe00000) == 0) return 4; + else if ((code & 0xfc000000) == 0) return 5; + else if ((code & 0x80000000) == 0) return 6; +#ifdef USE_INVALID_CODE_SCHEME + else if (code == INVALID_CODE_FE) return 1; + else if (code == INVALID_CODE_FF) return 1; +#endif + else + return ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE; +} + +#if 0 +static int +utf8_code_to_mbc_first(OnigCodePoint code) +{ + if ((code & 0xffffff80) == 0) + return code; + else { + if ((code & 0xfffff800) == 0) + return ((code>>6)& 0x1f) | 0xc0; + else if ((code & 0xffff0000) == 0) + return ((code>>12) & 0x0f) | 0xe0; + else if ((code & 0xffe00000) == 0) + return ((code>>18) & 0x07) | 0xf0; + else if ((code & 0xfc000000) == 0) + return ((code>>24) & 0x03) | 0xf8; + else if ((code & 0x80000000) == 0) + return ((code>>30) & 0x01) | 0xfc; + else { + return ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE; + } + } +} +#endif + +static int +utf8_code_to_mbc(OnigCodePoint code, UChar *buf) +{ +#define UTF8_TRAILS(code, shift) (UChar )((((code) >> (shift)) & 0x3f) | 0x80) +#define UTF8_TRAIL0(code) (UChar )(((code) & 0x3f) | 0x80) + + if ((code & 0xffffff80) == 0) { + *buf = (UChar )code; + return 1; + } + else { + UChar *p = buf; + + if ((code & 0xfffff800) == 0) { + *p++ = (UChar )(((code>>6)& 0x1f) | 0xc0); + } + else if ((code & 0xffff0000) == 0) { + *p++ = (UChar )(((code>>12) & 0x0f) | 0xe0); + *p++ = UTF8_TRAILS(code, 6); + } + else if ((code & 0xffe00000) == 0) { + *p++ = (UChar )(((code>>18) & 0x07) | 0xf0); + *p++ = UTF8_TRAILS(code, 12); + *p++ = UTF8_TRAILS(code, 6); + } + else if ((code & 0xfc000000) == 0) { + *p++ = (UChar )(((code>>24) & 0x03) | 0xf8); + *p++ = UTF8_TRAILS(code, 18); + *p++ = UTF8_TRAILS(code, 12); + *p++ = UTF8_TRAILS(code, 6); + } + else if ((code & 0x80000000) == 0) { + *p++ = (UChar )(((code>>30) & 0x01) | 0xfc); + *p++ = UTF8_TRAILS(code, 24); + *p++ = UTF8_TRAILS(code, 18); + *p++ = UTF8_TRAILS(code, 12); + *p++ = UTF8_TRAILS(code, 6); + } +#ifdef USE_INVALID_CODE_SCHEME + else if (code == INVALID_CODE_FE) { + *p = 0xfe; + return 1; + } + else if (code == INVALID_CODE_FF) { + *p = 0xff; + return 1; + } +#endif + else { + return ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE; + } + + *p++ = UTF8_TRAIL0(code); + return p - buf; + } +} + +static int +utf8_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, UChar* lower) +{ + const UChar* p = *pp; + + if (ONIGENC_IS_MBC_ASCII(p)) { + if (end > p + 1 && + (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 && + ((*p == 's' && *(p+1) == 's') || + ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + (*p == 'S' && *(p+1) == 'S')))) { + *lower++ = '\303'; + *lower = '\237'; + (*pp) += 2; + return 2; + } + + if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { + *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); + } + else { + *lower = *p; + } + (*pp)++; + return 1; /* return byte length of converted char to lower */ + } + else { + int len; + + if (*p == 195) { /* 195 == '\303' */ + int c = *(p + 1); + if (c >= 128) { + if (c <= (UChar )'\236' && /* upper */ + (flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0) { + if (c != (UChar )'\227') { + *lower++ = *p; + *lower = (UChar )(c + 32); + (*pp) += 2; + return 2; + } + } +#if 0 + else if (c == (UChar )'\237' && + (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { + *lower++ = '\303'; + *lower = '\237'; + (*pp) += 2; + return 2; + } +#endif + } + } + + len = enc_len(ONIG_ENCODING_UTF8, p); + if (lower != p) { + int i; + for (i = 0; i < len; i++) { + *lower++ = *p++; + } + } + (*pp) += len; + return len; /* return byte length of converted char to lower */ + } +} + +static int +utf8_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) +{ + const UChar* p = *pp; + + if (ONIGENC_IS_MBC_ASCII(p)) { + if (end > p + 1 && + (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 && + ((*p == 's' && *(p+1) == 's') || + ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && + (*p == 'S' && *(p+1) == 'S')))) { + (*pp) += 2; + return TRUE; + } + + (*pp)++; + if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { + return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); + } + } + else { + (*pp) += enc_len(ONIG_ENCODING_UTF8, p); + + if (*p == 195) { /* 195 == '\303' */ + int c = *(p + 1); + if (c >= 128) { + if ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0) { + if (c <= (UChar )'\236') { /* upper */ + if (c == (UChar )'\227') return FALSE; + return TRUE; + } + else if (c >= (UChar )'\240' && c <= (UChar )'\276') { /* lower */ + if (c == (UChar )'\267') return FALSE; + return TRUE; + } + } + else if (c == (UChar )'\237' && + (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { + return TRUE; + } + } + } + } + + return FALSE; +} + + +static OnigCodePoint EmptyRange[] = { 0 }; + +static OnigCodePoint SBAlnum[] = { + 3, + 0x0030, 0x0039, + 0x0041, 0x005a, + 0x0061, 0x007a +}; + +static OnigCodePoint MBAlnum[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 411, +#else + 6, +#endif + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x0236 +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x0250, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ee, 0x02ee, + 0x0300, 0x0357, + 0x035d, 0x036f, + 0x037a, 0x037a, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03f5, + 0x03f7, 0x03fb, + 0x0400, 0x0481, + 0x0483, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f5, + 0x04f8, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x0559, + 0x0561, 0x0587, + 0x0591, 0x05a1, + 0x05a3, 0x05b9, + 0x05bb, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c4, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0610, 0x0615, + 0x0621, 0x063a, + 0x0640, 0x0658, + 0x0660, 0x0669, + 0x066e, 0x06d3, + 0x06d5, 0x06dc, + 0x06de, 0x06e8, + 0x06ea, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x074a, + 0x074d, 0x074f, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0963, + 0x0966, 0x096f, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09cd, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09f1, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b66, 0x0b6f, + 0x0b71, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb5, + 0x0bb7, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0be7, 0x0bef, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c66, 0x0c6f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d66, 0x0d6f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df3, + 0x0e01, 0x0e3a, + 0x0e40, 0x0e4e, + 0x0e50, 0x0e59, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f18, 0x0f19, + 0x0f20, 0x0f29, + 0x0f35, 0x0f35, + 0x0f37, 0x0f37, + 0x0f39, 0x0f39, + 0x0f3e, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f84, + 0x0f86, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fc6, 0x0fc6, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1040, 0x1049, + 0x1050, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10f8, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1206, + 0x1208, 0x1246, + 0x1248, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1286, + 0x1288, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12ae, + 0x12b0, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12ce, + 0x12d0, 0x12d6, + 0x12d8, 0x12ee, + 0x12f0, 0x130e, + 0x1310, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x131e, + 0x1320, 0x1346, + 0x1348, 0x135a, + 0x1369, 0x1371, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x1676, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1734, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17b3, + 0x17b6, 0x17d3, + 0x17d7, 0x17d7, + 0x17dc, 0x17dd, + 0x17e0, 0x17e9, + 0x180b, 0x180d, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1946, 0x196d, + 0x1970, 0x1974, + 0x1d00, 0x1d6b, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x20d0, 0x20ea, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x212f, 0x2131, + 0x2133, 0x2139, + 0x213d, 0x213f, + 0x2145, 0x2149, + 0x3005, 0x3006, + 0x302a, 0x302f, + 0x3031, 0x3035, + 0x303b, 0x303c, + 0x3041, 0x3096, + 0x3099, 0x309a, + 0x309d, 0x309f, + 0x30a1, 0x30fa, + 0x30fc, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3400, 0x4db5, + 0x4e00, 0x9fa5, + 0xa000, 0xa48c, + 0xac00, 0xd7a3, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff10, 0xff19, + 0xff21, 0xff3a, + 0xff41, 0xff5a, + 0xff66, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10300, 0x1031e, + 0x10330, 0x10349, + 0x10380, 0x1039d, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x1d165, 0x1d169, + 0x1d16d, 0x1d172, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a3, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9, + 0x1d7ce, 0x1d7ff, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0100, 0xe01ef +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of MBAlnum */ + +static OnigCodePoint SBAlpha[] = { + 2, + 0x0041, 0x005a, + 0x0061, 0x007a +}; + +static OnigCodePoint MBAlpha[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 394, +#else + 6, +#endif + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x0236 +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x0250, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ee, 0x02ee, + 0x0300, 0x0357, + 0x035d, 0x036f, + 0x037a, 0x037a, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03f5, + 0x03f7, 0x03fb, + 0x0400, 0x0481, + 0x0483, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f5, + 0x04f8, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x0559, + 0x0561, 0x0587, + 0x0591, 0x05a1, + 0x05a3, 0x05b9, + 0x05bb, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c4, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0610, 0x0615, + 0x0621, 0x063a, + 0x0640, 0x0658, + 0x066e, 0x06d3, + 0x06d5, 0x06dc, + 0x06de, 0x06e8, + 0x06ea, 0x06ef, + 0x06fa, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x074a, + 0x074d, 0x074f, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0963, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09cd, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09f0, 0x09f1, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a70, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b71, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb5, + 0x0bb7, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df3, + 0x0e01, 0x0e3a, + 0x0e40, 0x0e4e, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f18, 0x0f19, + 0x0f35, 0x0f35, + 0x0f37, 0x0f37, + 0x0f39, 0x0f39, + 0x0f3e, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f84, + 0x0f86, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fc6, 0x0fc6, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1050, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10f8, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1206, + 0x1208, 0x1246, + 0x1248, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1286, + 0x1288, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12ae, + 0x12b0, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12ce, + 0x12d0, 0x12d6, + 0x12d8, 0x12ee, + 0x12f0, 0x130e, + 0x1310, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x131e, + 0x1320, 0x1346, + 0x1348, 0x135a, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x1676, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1734, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17b3, + 0x17b6, 0x17d3, + 0x17d7, 0x17d7, + 0x17dc, 0x17dd, + 0x180b, 0x180d, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1950, 0x196d, + 0x1970, 0x1974, + 0x1d00, 0x1d6b, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x20d0, 0x20ea, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x212f, 0x2131, + 0x2133, 0x2139, + 0x213d, 0x213f, + 0x2145, 0x2149, + 0x3005, 0x3006, + 0x302a, 0x302f, + 0x3031, 0x3035, + 0x303b, 0x303c, + 0x3041, 0x3096, + 0x3099, 0x309a, + 0x309d, 0x309f, + 0x30a1, 0x30fa, + 0x30fc, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3400, 0x4db5, + 0x4e00, 0x9fa5, + 0xa000, 0xa48c, + 0xac00, 0xd7a3, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff21, 0xff3a, + 0xff41, 0xff5a, + 0xff66, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10300, 0x1031e, + 0x10330, 0x10349, + 0x10380, 0x1039d, + 0x10400, 0x1049d, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x1d165, 0x1d169, + 0x1d16d, 0x1d172, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a3, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0100, 0xe01ef +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of MBAlpha */ + +static OnigCodePoint SBBlank[] = { + 2, + 0x0009, 0x0009, + 0x0020, 0x0020 +}; + +static OnigCodePoint MBBlank[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 7, +#else + 1, +#endif + 0x00a0, 0x00a0 +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x1680, 0x1680, + 0x180e, 0x180e, + 0x2000, 0x200a, + 0x202f, 0x202f, + 0x205f, 0x205f, + 0x3000, 0x3000 +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of MBBlank */ + +static OnigCodePoint SBCntrl[] = { + 2, + 0x0000, 0x001f, + 0x007f, 0x007f +}; + +static OnigCodePoint MBCntrl[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 18, +#else + 2, +#endif + 0x0080, 0x009f, + 0x00ad, 0x00ad +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x0600, 0x0603, + 0x06dd, 0x06dd, + 0x070f, 0x070f, + 0x17b4, 0x17b5, + 0x200b, 0x200f, + 0x202a, 0x202e, + 0x2060, 0x2063, + 0x206a, 0x206f, + 0xd800, 0xf8ff, + 0xfeff, 0xfeff, + 0xfff9, 0xfffb, + 0x1d173, 0x1d17a, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of MBCntrl */ + +static OnigCodePoint SBDigit[] = { + 1, + 0x0030, 0x0039 +}; + +static OnigCodePoint MBDigit[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 22, +#else + 0 +#endif +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 0x0660, 0x0669, + 0x06f0, 0x06f9, + 0x0966, 0x096f, + 0x09e6, 0x09ef, + 0x0a66, 0x0a6f, + 0x0ae6, 0x0aef, + 0x0b66, 0x0b6f, + 0x0be7, 0x0bef, + 0x0c66, 0x0c6f, + 0x0ce6, 0x0cef, + 0x0d66, 0x0d6f, + 0x0e50, 0x0e59, + 0x0ed0, 0x0ed9, + 0x0f20, 0x0f29, + 0x1040, 0x1049, + 0x1369, 0x1371, + 0x17e0, 0x17e9, + 0x1810, 0x1819, + 0x1946, 0x194f, + 0xff10, 0xff19, + 0x104a0, 0x104a9, + 0x1d7ce, 0x1d7ff +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of MBDigit */ + +static OnigCodePoint SBGraph[] = { + 1, + 0x0021, 0x007e +}; + +static OnigCodePoint MBGraph[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 404, +#else + 1, +#endif + 0x00a1, 0x0236 +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x0250, 0x0357, + 0x035d, 0x036f, + 0x0374, 0x0375, + 0x037a, 0x037a, + 0x037e, 0x037e, + 0x0384, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03fb, + 0x0400, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f5, + 0x04f8, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x055f, + 0x0561, 0x0587, + 0x0589, 0x058a, + 0x0591, 0x05a1, + 0x05a3, 0x05b9, + 0x05bb, 0x05c4, + 0x05d0, 0x05ea, + 0x05f0, 0x05f4, + 0x0600, 0x0603, + 0x060c, 0x0615, + 0x061b, 0x061b, + 0x061f, 0x061f, + 0x0621, 0x063a, + 0x0640, 0x0658, + 0x0660, 0x070d, + 0x070f, 0x074a, + 0x074d, 0x074f, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0970, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09cd, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09fa, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0af1, 0x0af1, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b66, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb5, + 0x0bb7, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0be7, 0x0bfa, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c66, 0x0c6f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d66, 0x0d6f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df4, + 0x0e01, 0x0e3a, + 0x0e3f, 0x0e5b, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fbe, 0x0fcc, + 0x0fcf, 0x0fcf, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1040, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10f8, + 0x10fb, 0x10fb, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1206, + 0x1208, 0x1246, + 0x1248, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1286, + 0x1288, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12ae, + 0x12b0, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12ce, + 0x12d0, 0x12d6, + 0x12d8, 0x12ee, + 0x12f0, 0x130e, + 0x1310, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x131e, + 0x1320, 0x1346, + 0x1348, 0x135a, + 0x1361, 0x137c, + 0x13a0, 0x13f4, + 0x1401, 0x1676, + 0x1681, 0x169c, + 0x16a0, 0x16f0, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1736, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17dd, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x1800, 0x180d, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1940, 0x1940, + 0x1944, 0x196d, + 0x1970, 0x1974, + 0x19e0, 0x19ff, + 0x1d00, 0x1d6b, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fc4, + 0x1fc6, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fdd, 0x1fef, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffe, + 0x200b, 0x2027, + 0x202a, 0x202e, + 0x2030, 0x2054, + 0x2057, 0x2057, + 0x2060, 0x2063, + 0x206a, 0x2071, + 0x2074, 0x208e, + 0x20a0, 0x20b1, + 0x20d0, 0x20ea, + 0x2100, 0x213b, + 0x213d, 0x214b, + 0x2153, 0x2183, + 0x2190, 0x23d0, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x2460, 0x2617, + 0x2619, 0x267d, + 0x2680, 0x2691, + 0x26a0, 0x26a1, + 0x2701, 0x2704, + 0x2706, 0x2709, + 0x270c, 0x2727, + 0x2729, 0x274b, + 0x274d, 0x274d, + 0x274f, 0x2752, + 0x2756, 0x2756, + 0x2758, 0x275e, + 0x2761, 0x2794, + 0x2798, 0x27af, + 0x27b1, 0x27be, + 0x27d0, 0x27eb, + 0x27f0, 0x2b0d, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x2ff0, 0x2ffb, + 0x3001, 0x303f, + 0x3041, 0x3096, + 0x3099, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x3190, 0x31b7, + 0x31f0, 0x321e, + 0x3220, 0x3243, + 0x3250, 0x327d, + 0x327f, 0x32fe, + 0x3300, 0x4db5, + 0x4dc0, 0x9fa5, + 0xa000, 0xa48c, + 0xa490, 0xa4c6, + 0xac00, 0xd7a3, + 0xe000, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3f, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfd, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0xfe30, 0xfe52, + 0xfe54, 0xfe66, + 0xfe68, 0xfe6b, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xfeff, 0xfeff, + 0xff01, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0xffe0, 0xffe6, + 0xffe8, 0xffee, + 0xfff9, 0xfffd, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10100, 0x10102, + 0x10107, 0x10133, + 0x10137, 0x1013f, + 0x10300, 0x1031e, + 0x10320, 0x10323, + 0x10330, 0x1034a, + 0x10380, 0x1039d, + 0x1039f, 0x1039f, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d12a, 0x1d1dd, + 0x1d300, 0x1d356, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a3, + 0x1d6a8, 0x1d7c9, + 0x1d7ce, 0x1d7ff, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f, + 0xe0100, 0xe01ef, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of MBGraph */ + +static OnigCodePoint SBLower[] = { + 1, + 0x0061, 0x007a +}; + +static OnigCodePoint MBLower[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 423, +#else + 5, +#endif + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00df, 0x00f6, + 0x00f8, 0x00ff +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x0101, 0x0101, + 0x0103, 0x0103, + 0x0105, 0x0105, + 0x0107, 0x0107, + 0x0109, 0x0109, + 0x010b, 0x010b, + 0x010d, 0x010d, + 0x010f, 0x010f, + 0x0111, 0x0111, + 0x0113, 0x0113, + 0x0115, 0x0115, + 0x0117, 0x0117, + 0x0119, 0x0119, + 0x011b, 0x011b, + 0x011d, 0x011d, + 0x011f, 0x011f, + 0x0121, 0x0121, + 0x0123, 0x0123, + 0x0125, 0x0125, + 0x0127, 0x0127, + 0x0129, 0x0129, + 0x012b, 0x012b, + 0x012d, 0x012d, + 0x012f, 0x012f, + 0x0131, 0x0131, + 0x0133, 0x0133, + 0x0135, 0x0135, + 0x0137, 0x0138, + 0x013a, 0x013a, + 0x013c, 0x013c, + 0x013e, 0x013e, + 0x0140, 0x0140, + 0x0142, 0x0142, + 0x0144, 0x0144, + 0x0146, 0x0146, + 0x0148, 0x0149, + 0x014b, 0x014b, + 0x014d, 0x014d, + 0x014f, 0x014f, + 0x0151, 0x0151, + 0x0153, 0x0153, + 0x0155, 0x0155, + 0x0157, 0x0157, + 0x0159, 0x0159, + 0x015b, 0x015b, + 0x015d, 0x015d, + 0x015f, 0x015f, + 0x0161, 0x0161, + 0x0163, 0x0163, + 0x0165, 0x0165, + 0x0167, 0x0167, + 0x0169, 0x0169, + 0x016b, 0x016b, + 0x016d, 0x016d, + 0x016f, 0x016f, + 0x0171, 0x0171, + 0x0173, 0x0173, + 0x0175, 0x0175, + 0x0177, 0x0177, + 0x017a, 0x017a, + 0x017c, 0x017c, + 0x017e, 0x0180, + 0x0183, 0x0183, + 0x0185, 0x0185, + 0x0188, 0x0188, + 0x018c, 0x018d, + 0x0192, 0x0192, + 0x0195, 0x0195, + 0x0199, 0x019b, + 0x019e, 0x019e, + 0x01a1, 0x01a1, + 0x01a3, 0x01a3, + 0x01a5, 0x01a5, + 0x01a8, 0x01a8, + 0x01aa, 0x01ab, + 0x01ad, 0x01ad, + 0x01b0, 0x01b0, + 0x01b4, 0x01b4, + 0x01b6, 0x01b6, + 0x01b9, 0x01ba, + 0x01bd, 0x01bf, + 0x01c6, 0x01c6, + 0x01c9, 0x01c9, + 0x01cc, 0x01cc, + 0x01ce, 0x01ce, + 0x01d0, 0x01d0, + 0x01d2, 0x01d2, + 0x01d4, 0x01d4, + 0x01d6, 0x01d6, + 0x01d8, 0x01d8, + 0x01da, 0x01da, + 0x01dc, 0x01dd, + 0x01df, 0x01df, + 0x01e1, 0x01e1, + 0x01e3, 0x01e3, + 0x01e5, 0x01e5, + 0x01e7, 0x01e7, + 0x01e9, 0x01e9, + 0x01eb, 0x01eb, + 0x01ed, 0x01ed, + 0x01ef, 0x01f0, + 0x01f3, 0x01f3, + 0x01f5, 0x01f5, + 0x01f9, 0x01f9, + 0x01fb, 0x01fb, + 0x01fd, 0x01fd, + 0x01ff, 0x01ff, + 0x0201, 0x0201, + 0x0203, 0x0203, + 0x0205, 0x0205, + 0x0207, 0x0207, + 0x0209, 0x0209, + 0x020b, 0x020b, + 0x020d, 0x020d, + 0x020f, 0x020f, + 0x0211, 0x0211, + 0x0213, 0x0213, + 0x0215, 0x0215, + 0x0217, 0x0217, + 0x0219, 0x0219, + 0x021b, 0x021b, + 0x021d, 0x021d, + 0x021f, 0x021f, + 0x0221, 0x0221, + 0x0223, 0x0223, + 0x0225, 0x0225, + 0x0227, 0x0227, + 0x0229, 0x0229, + 0x022b, 0x022b, + 0x022d, 0x022d, + 0x022f, 0x022f, + 0x0231, 0x0231, + 0x0233, 0x0236, + 0x0250, 0x02af, + 0x0390, 0x0390, + 0x03ac, 0x03ce, + 0x03d0, 0x03d1, + 0x03d5, 0x03d7, + 0x03d9, 0x03d9, + 0x03db, 0x03db, + 0x03dd, 0x03dd, + 0x03df, 0x03df, + 0x03e1, 0x03e1, + 0x03e3, 0x03e3, + 0x03e5, 0x03e5, + 0x03e7, 0x03e7, + 0x03e9, 0x03e9, + 0x03eb, 0x03eb, + 0x03ed, 0x03ed, + 0x03ef, 0x03f3, + 0x03f5, 0x03f5, + 0x03f8, 0x03f8, + 0x03fb, 0x03fb, + 0x0430, 0x045f, + 0x0461, 0x0461, + 0x0463, 0x0463, + 0x0465, 0x0465, + 0x0467, 0x0467, + 0x0469, 0x0469, + 0x046b, 0x046b, + 0x046d, 0x046d, + 0x046f, 0x046f, + 0x0471, 0x0471, + 0x0473, 0x0473, + 0x0475, 0x0475, + 0x0477, 0x0477, + 0x0479, 0x0479, + 0x047b, 0x047b, + 0x047d, 0x047d, + 0x047f, 0x047f, + 0x0481, 0x0481, + 0x048b, 0x048b, + 0x048d, 0x048d, + 0x048f, 0x048f, + 0x0491, 0x0491, + 0x0493, 0x0493, + 0x0495, 0x0495, + 0x0497, 0x0497, + 0x0499, 0x0499, + 0x049b, 0x049b, + 0x049d, 0x049d, + 0x049f, 0x049f, + 0x04a1, 0x04a1, + 0x04a3, 0x04a3, + 0x04a5, 0x04a5, + 0x04a7, 0x04a7, + 0x04a9, 0x04a9, + 0x04ab, 0x04ab, + 0x04ad, 0x04ad, + 0x04af, 0x04af, + 0x04b1, 0x04b1, + 0x04b3, 0x04b3, + 0x04b5, 0x04b5, + 0x04b7, 0x04b7, + 0x04b9, 0x04b9, + 0x04bb, 0x04bb, + 0x04bd, 0x04bd, + 0x04bf, 0x04bf, + 0x04c2, 0x04c2, + 0x04c4, 0x04c4, + 0x04c6, 0x04c6, + 0x04c8, 0x04c8, + 0x04ca, 0x04ca, + 0x04cc, 0x04cc, + 0x04ce, 0x04ce, + 0x04d1, 0x04d1, + 0x04d3, 0x04d3, + 0x04d5, 0x04d5, + 0x04d7, 0x04d7, + 0x04d9, 0x04d9, + 0x04db, 0x04db, + 0x04dd, 0x04dd, + 0x04df, 0x04df, + 0x04e1, 0x04e1, + 0x04e3, 0x04e3, + 0x04e5, 0x04e5, + 0x04e7, 0x04e7, + 0x04e9, 0x04e9, + 0x04eb, 0x04eb, + 0x04ed, 0x04ed, + 0x04ef, 0x04ef, + 0x04f1, 0x04f1, + 0x04f3, 0x04f3, + 0x04f5, 0x04f5, + 0x04f9, 0x04f9, + 0x0501, 0x0501, + 0x0503, 0x0503, + 0x0505, 0x0505, + 0x0507, 0x0507, + 0x0509, 0x0509, + 0x050b, 0x050b, + 0x050d, 0x050d, + 0x050f, 0x050f, + 0x0561, 0x0587, + 0x1d00, 0x1d2b, + 0x1d62, 0x1d6b, + 0x1e01, 0x1e01, + 0x1e03, 0x1e03, + 0x1e05, 0x1e05, + 0x1e07, 0x1e07, + 0x1e09, 0x1e09, + 0x1e0b, 0x1e0b, + 0x1e0d, 0x1e0d, + 0x1e0f, 0x1e0f, + 0x1e11, 0x1e11, + 0x1e13, 0x1e13, + 0x1e15, 0x1e15, + 0x1e17, 0x1e17, + 0x1e19, 0x1e19, + 0x1e1b, 0x1e1b, + 0x1e1d, 0x1e1d, + 0x1e1f, 0x1e1f, + 0x1e21, 0x1e21, + 0x1e23, 0x1e23, + 0x1e25, 0x1e25, + 0x1e27, 0x1e27, + 0x1e29, 0x1e29, + 0x1e2b, 0x1e2b, + 0x1e2d, 0x1e2d, + 0x1e2f, 0x1e2f, + 0x1e31, 0x1e31, + 0x1e33, 0x1e33, + 0x1e35, 0x1e35, + 0x1e37, 0x1e37, + 0x1e39, 0x1e39, + 0x1e3b, 0x1e3b, + 0x1e3d, 0x1e3d, + 0x1e3f, 0x1e3f, + 0x1e41, 0x1e41, + 0x1e43, 0x1e43, + 0x1e45, 0x1e45, + 0x1e47, 0x1e47, + 0x1e49, 0x1e49, + 0x1e4b, 0x1e4b, + 0x1e4d, 0x1e4d, + 0x1e4f, 0x1e4f, + 0x1e51, 0x1e51, + 0x1e53, 0x1e53, + 0x1e55, 0x1e55, + 0x1e57, 0x1e57, + 0x1e59, 0x1e59, + 0x1e5b, 0x1e5b, + 0x1e5d, 0x1e5d, + 0x1e5f, 0x1e5f, + 0x1e61, 0x1e61, + 0x1e63, 0x1e63, + 0x1e65, 0x1e65, + 0x1e67, 0x1e67, + 0x1e69, 0x1e69, + 0x1e6b, 0x1e6b, + 0x1e6d, 0x1e6d, + 0x1e6f, 0x1e6f, + 0x1e71, 0x1e71, + 0x1e73, 0x1e73, + 0x1e75, 0x1e75, + 0x1e77, 0x1e77, + 0x1e79, 0x1e79, + 0x1e7b, 0x1e7b, + 0x1e7d, 0x1e7d, + 0x1e7f, 0x1e7f, + 0x1e81, 0x1e81, + 0x1e83, 0x1e83, + 0x1e85, 0x1e85, + 0x1e87, 0x1e87, + 0x1e89, 0x1e89, + 0x1e8b, 0x1e8b, + 0x1e8d, 0x1e8d, + 0x1e8f, 0x1e8f, + 0x1e91, 0x1e91, + 0x1e93, 0x1e93, + 0x1e95, 0x1e9b, + 0x1ea1, 0x1ea1, + 0x1ea3, 0x1ea3, + 0x1ea5, 0x1ea5, + 0x1ea7, 0x1ea7, + 0x1ea9, 0x1ea9, + 0x1eab, 0x1eab, + 0x1ead, 0x1ead, + 0x1eaf, 0x1eaf, + 0x1eb1, 0x1eb1, + 0x1eb3, 0x1eb3, + 0x1eb5, 0x1eb5, + 0x1eb7, 0x1eb7, + 0x1eb9, 0x1eb9, + 0x1ebb, 0x1ebb, + 0x1ebd, 0x1ebd, + 0x1ebf, 0x1ebf, + 0x1ec1, 0x1ec1, + 0x1ec3, 0x1ec3, + 0x1ec5, 0x1ec5, + 0x1ec7, 0x1ec7, + 0x1ec9, 0x1ec9, + 0x1ecb, 0x1ecb, + 0x1ecd, 0x1ecd, + 0x1ecf, 0x1ecf, + 0x1ed1, 0x1ed1, + 0x1ed3, 0x1ed3, + 0x1ed5, 0x1ed5, + 0x1ed7, 0x1ed7, + 0x1ed9, 0x1ed9, + 0x1edb, 0x1edb, + 0x1edd, 0x1edd, + 0x1edf, 0x1edf, + 0x1ee1, 0x1ee1, + 0x1ee3, 0x1ee3, + 0x1ee5, 0x1ee5, + 0x1ee7, 0x1ee7, + 0x1ee9, 0x1ee9, + 0x1eeb, 0x1eeb, + 0x1eed, 0x1eed, + 0x1eef, 0x1eef, + 0x1ef1, 0x1ef1, + 0x1ef3, 0x1ef3, + 0x1ef5, 0x1ef5, + 0x1ef7, 0x1ef7, + 0x1ef9, 0x1ef9, + 0x1f00, 0x1f07, + 0x1f10, 0x1f15, + 0x1f20, 0x1f27, + 0x1f30, 0x1f37, + 0x1f40, 0x1f45, + 0x1f50, 0x1f57, + 0x1f60, 0x1f67, + 0x1f70, 0x1f7d, + 0x1f80, 0x1f87, + 0x1f90, 0x1f97, + 0x1fa0, 0x1fa7, + 0x1fb0, 0x1fb4, + 0x1fb6, 0x1fb7, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fc7, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fd7, + 0x1fe0, 0x1fe7, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ff7, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x210a, 0x210a, + 0x210e, 0x210f, + 0x2113, 0x2113, + 0x212f, 0x212f, + 0x2134, 0x2134, + 0x2139, 0x2139, + 0x213d, 0x213d, + 0x2146, 0x2149, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xff41, 0xff5a, + 0x10428, 0x1044f, + 0x1d41a, 0x1d433, + 0x1d44e, 0x1d454, + 0x1d456, 0x1d467, + 0x1d482, 0x1d49b, + 0x1d4b6, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d4cf, + 0x1d4ea, 0x1d503, + 0x1d51e, 0x1d537, + 0x1d552, 0x1d56b, + 0x1d586, 0x1d59f, + 0x1d5ba, 0x1d5d3, + 0x1d5ee, 0x1d607, + 0x1d622, 0x1d63b, + 0x1d656, 0x1d66f, + 0x1d68a, 0x1d6a3, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6e1, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d71b, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d755, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d78f, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9 +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of MBLower */ + +static OnigCodePoint SBPrint[] = { + 2, + 0x0009, 0x000d, + 0x0020, 0x007e +}; + +static OnigCodePoint MBPrint[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 403, +#else + 2, +#endif + 0x0085, 0x0085, + 0x00a0, 0x0236 +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x0250, 0x0357, + 0x035d, 0x036f, + 0x0374, 0x0375, + 0x037a, 0x037a, + 0x037e, 0x037e, + 0x0384, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03fb, + 0x0400, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f5, + 0x04f8, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x055f, + 0x0561, 0x0587, + 0x0589, 0x058a, + 0x0591, 0x05a1, + 0x05a3, 0x05b9, + 0x05bb, 0x05c4, + 0x05d0, 0x05ea, + 0x05f0, 0x05f4, + 0x0600, 0x0603, + 0x060c, 0x0615, + 0x061b, 0x061b, + 0x061f, 0x061f, + 0x0621, 0x063a, + 0x0640, 0x0658, + 0x0660, 0x070d, + 0x070f, 0x074a, + 0x074d, 0x074f, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0970, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09cd, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09fa, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0af1, 0x0af1, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b66, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb5, + 0x0bb7, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0be7, 0x0bfa, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c66, 0x0c6f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d66, 0x0d6f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df4, + 0x0e01, 0x0e3a, + 0x0e3f, 0x0e5b, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fbe, 0x0fcc, + 0x0fcf, 0x0fcf, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1040, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10f8, + 0x10fb, 0x10fb, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1206, + 0x1208, 0x1246, + 0x1248, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1286, + 0x1288, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12ae, + 0x12b0, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12ce, + 0x12d0, 0x12d6, + 0x12d8, 0x12ee, + 0x12f0, 0x130e, + 0x1310, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x131e, + 0x1320, 0x1346, + 0x1348, 0x135a, + 0x1361, 0x137c, + 0x13a0, 0x13f4, + 0x1401, 0x1676, + 0x1680, 0x169c, + 0x16a0, 0x16f0, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1736, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17dd, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x1800, 0x180e, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1940, 0x1940, + 0x1944, 0x196d, + 0x1970, 0x1974, + 0x19e0, 0x19ff, + 0x1d00, 0x1d6b, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fc4, + 0x1fc6, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fdd, 0x1fef, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffe, + 0x2000, 0x2054, + 0x2057, 0x2057, + 0x205f, 0x2063, + 0x206a, 0x2071, + 0x2074, 0x208e, + 0x20a0, 0x20b1, + 0x20d0, 0x20ea, + 0x2100, 0x213b, + 0x213d, 0x214b, + 0x2153, 0x2183, + 0x2190, 0x23d0, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x2460, 0x2617, + 0x2619, 0x267d, + 0x2680, 0x2691, + 0x26a0, 0x26a1, + 0x2701, 0x2704, + 0x2706, 0x2709, + 0x270c, 0x2727, + 0x2729, 0x274b, + 0x274d, 0x274d, + 0x274f, 0x2752, + 0x2756, 0x2756, + 0x2758, 0x275e, + 0x2761, 0x2794, + 0x2798, 0x27af, + 0x27b1, 0x27be, + 0x27d0, 0x27eb, + 0x27f0, 0x2b0d, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x2ff0, 0x2ffb, + 0x3000, 0x303f, + 0x3041, 0x3096, + 0x3099, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x3190, 0x31b7, + 0x31f0, 0x321e, + 0x3220, 0x3243, + 0x3250, 0x327d, + 0x327f, 0x32fe, + 0x3300, 0x4db5, + 0x4dc0, 0x9fa5, + 0xa000, 0xa48c, + 0xa490, 0xa4c6, + 0xac00, 0xd7a3, + 0xe000, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3f, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfd, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0xfe30, 0xfe52, + 0xfe54, 0xfe66, + 0xfe68, 0xfe6b, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xfeff, 0xfeff, + 0xff01, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0xffe0, 0xffe6, + 0xffe8, 0xffee, + 0xfff9, 0xfffd, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10100, 0x10102, + 0x10107, 0x10133, + 0x10137, 0x1013f, + 0x10300, 0x1031e, + 0x10320, 0x10323, + 0x10330, 0x1034a, + 0x10380, 0x1039d, + 0x1039f, 0x1039f, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d12a, 0x1d1dd, + 0x1d300, 0x1d356, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a3, + 0x1d6a8, 0x1d7c9, + 0x1d7ce, 0x1d7ff, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f, + 0xe0100, 0xe01ef, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of MBPrint */ + +static OnigCodePoint SBPunct[] = { + 9, + 0x0021, 0x0023, + 0x0025, 0x002a, + 0x002c, 0x002f, + 0x003a, 0x003b, + 0x003f, 0x0040, + 0x005b, 0x005d, + 0x005f, 0x005f, + 0x007b, 0x007b, + 0x007d, 0x007d +}; /* end of SBPunct */ + +static OnigCodePoint MBPunct[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 77, +#else + 5, +#endif + 0x00a1, 0x00a1, + 0x00ab, 0x00ab, + 0x00b7, 0x00b7, + 0x00bb, 0x00bb, + 0x00bf, 0x00bf +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x037e, 0x037e, + 0x0387, 0x0387, + 0x055a, 0x055f, + 0x0589, 0x058a, + 0x05be, 0x05be, + 0x05c0, 0x05c0, + 0x05c3, 0x05c3, + 0x05f3, 0x05f4, + 0x060c, 0x060d, + 0x061b, 0x061b, + 0x061f, 0x061f, + 0x066a, 0x066d, + 0x06d4, 0x06d4, + 0x0700, 0x070d, + 0x0964, 0x0965, + 0x0970, 0x0970, + 0x0df4, 0x0df4, + 0x0e4f, 0x0e4f, + 0x0e5a, 0x0e5b, + 0x0f04, 0x0f12, + 0x0f3a, 0x0f3d, + 0x0f85, 0x0f85, + 0x104a, 0x104f, + 0x10fb, 0x10fb, + 0x1361, 0x1368, + 0x166d, 0x166e, + 0x169b, 0x169c, + 0x16eb, 0x16ed, + 0x1735, 0x1736, + 0x17d4, 0x17d6, + 0x17d8, 0x17da, + 0x1800, 0x180a, + 0x1944, 0x1945, + 0x2010, 0x2027, + 0x2030, 0x2043, + 0x2045, 0x2051, + 0x2053, 0x2054, + 0x2057, 0x2057, + 0x207d, 0x207e, + 0x208d, 0x208e, + 0x2329, 0x232a, + 0x23b4, 0x23b6, + 0x2768, 0x2775, + 0x27e6, 0x27eb, + 0x2983, 0x2998, + 0x29d8, 0x29db, + 0x29fc, 0x29fd, + 0x3001, 0x3003, + 0x3008, 0x3011, + 0x3014, 0x301f, + 0x3030, 0x3030, + 0x303d, 0x303d, + 0x30a0, 0x30a0, + 0x30fb, 0x30fb, + 0xfd3e, 0xfd3f, + 0xfe30, 0xfe52, + 0xfe54, 0xfe61, + 0xfe63, 0xfe63, + 0xfe68, 0xfe68, + 0xfe6a, 0xfe6b, + 0xff01, 0xff03, + 0xff05, 0xff0a, + 0xff0c, 0xff0f, + 0xff1a, 0xff1b, + 0xff1f, 0xff20, + 0xff3b, 0xff3d, + 0xff3f, 0xff3f, + 0xff5b, 0xff5b, + 0xff5d, 0xff5d, + 0xff5f, 0xff65, + 0x10100, 0x10101, + 0x1039f, 0x1039f +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of MBPunct */ + +static OnigCodePoint SBSpace[] = { + 2, + 0x0009, 0x000d, + 0x0020, 0x0020 +}; + +static OnigCodePoint MBSpace[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 9, +#else + 2, +#endif + 0x0085, 0x0085, + 0x00a0, 0x00a0 +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x1680, 0x1680, + 0x180e, 0x180e, + 0x2000, 0x200a, + 0x2028, 0x2029, + 0x202f, 0x202f, + 0x205f, 0x205f, + 0x3000, 0x3000 +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of MBSpace */ + +static OnigCodePoint SBUpper[] = { + 1, + 0x0041, 0x005a +}; + +static OnigCodePoint MBUpper[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 420, +#else + 2, +#endif + 0x00c0, 0x00d6, + 0x00d8, 0x00de +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + , + 0x0100, 0x0100, + 0x0102, 0x0102, + 0x0104, 0x0104, + 0x0106, 0x0106, + 0x0108, 0x0108, + 0x010a, 0x010a, + 0x010c, 0x010c, + 0x010e, 0x010e, + 0x0110, 0x0110, + 0x0112, 0x0112, + 0x0114, 0x0114, + 0x0116, 0x0116, + 0x0118, 0x0118, + 0x011a, 0x011a, + 0x011c, 0x011c, + 0x011e, 0x011e, + 0x0120, 0x0120, + 0x0122, 0x0122, + 0x0124, 0x0124, + 0x0126, 0x0126, + 0x0128, 0x0128, + 0x012a, 0x012a, + 0x012c, 0x012c, + 0x012e, 0x012e, + 0x0130, 0x0130, + 0x0132, 0x0132, + 0x0134, 0x0134, + 0x0136, 0x0136, + 0x0139, 0x0139, + 0x013b, 0x013b, + 0x013d, 0x013d, + 0x013f, 0x013f, + 0x0141, 0x0141, + 0x0143, 0x0143, + 0x0145, 0x0145, + 0x0147, 0x0147, + 0x014a, 0x014a, + 0x014c, 0x014c, + 0x014e, 0x014e, + 0x0150, 0x0150, + 0x0152, 0x0152, + 0x0154, 0x0154, + 0x0156, 0x0156, + 0x0158, 0x0158, + 0x015a, 0x015a, + 0x015c, 0x015c, + 0x015e, 0x015e, + 0x0160, 0x0160, + 0x0162, 0x0162, + 0x0164, 0x0164, + 0x0166, 0x0166, + 0x0168, 0x0168, + 0x016a, 0x016a, + 0x016c, 0x016c, + 0x016e, 0x016e, + 0x0170, 0x0170, + 0x0172, 0x0172, + 0x0174, 0x0174, + 0x0176, 0x0176, + 0x0178, 0x0179, + 0x017b, 0x017b, + 0x017d, 0x017d, + 0x0181, 0x0182, + 0x0184, 0x0184, + 0x0186, 0x0187, + 0x0189, 0x018b, + 0x018e, 0x0191, + 0x0193, 0x0194, + 0x0196, 0x0198, + 0x019c, 0x019d, + 0x019f, 0x01a0, + 0x01a2, 0x01a2, + 0x01a4, 0x01a4, + 0x01a6, 0x01a7, + 0x01a9, 0x01a9, + 0x01ac, 0x01ac, + 0x01ae, 0x01af, + 0x01b1, 0x01b3, + 0x01b5, 0x01b5, + 0x01b7, 0x01b8, + 0x01bc, 0x01bc, + 0x01c4, 0x01c4, + 0x01c7, 0x01c7, + 0x01ca, 0x01ca, + 0x01cd, 0x01cd, + 0x01cf, 0x01cf, + 0x01d1, 0x01d1, + 0x01d3, 0x01d3, + 0x01d5, 0x01d5, + 0x01d7, 0x01d7, + 0x01d9, 0x01d9, + 0x01db, 0x01db, + 0x01de, 0x01de, + 0x01e0, 0x01e0, + 0x01e2, 0x01e2, + 0x01e4, 0x01e4, + 0x01e6, 0x01e6, + 0x01e8, 0x01e8, + 0x01ea, 0x01ea, + 0x01ec, 0x01ec, + 0x01ee, 0x01ee, + 0x01f1, 0x01f1, + 0x01f4, 0x01f4, + 0x01f6, 0x01f8, + 0x01fa, 0x01fa, + 0x01fc, 0x01fc, + 0x01fe, 0x01fe, + 0x0200, 0x0200, + 0x0202, 0x0202, + 0x0204, 0x0204, + 0x0206, 0x0206, + 0x0208, 0x0208, + 0x020a, 0x020a, + 0x020c, 0x020c, + 0x020e, 0x020e, + 0x0210, 0x0210, + 0x0212, 0x0212, + 0x0214, 0x0214, + 0x0216, 0x0216, + 0x0218, 0x0218, + 0x021a, 0x021a, + 0x021c, 0x021c, + 0x021e, 0x021e, + 0x0220, 0x0220, + 0x0222, 0x0222, + 0x0224, 0x0224, + 0x0226, 0x0226, + 0x0228, 0x0228, + 0x022a, 0x022a, + 0x022c, 0x022c, + 0x022e, 0x022e, + 0x0230, 0x0230, + 0x0232, 0x0232, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x038f, + 0x0391, 0x03a1, + 0x03a3, 0x03ab, + 0x03d2, 0x03d4, + 0x03d8, 0x03d8, + 0x03da, 0x03da, + 0x03dc, 0x03dc, + 0x03de, 0x03de, + 0x03e0, 0x03e0, + 0x03e2, 0x03e2, + 0x03e4, 0x03e4, + 0x03e6, 0x03e6, + 0x03e8, 0x03e8, + 0x03ea, 0x03ea, + 0x03ec, 0x03ec, + 0x03ee, 0x03ee, + 0x03f4, 0x03f4, + 0x03f7, 0x03f7, + 0x03f9, 0x03fa, + 0x0400, 0x042f, + 0x0460, 0x0460, + 0x0462, 0x0462, + 0x0464, 0x0464, + 0x0466, 0x0466, + 0x0468, 0x0468, + 0x046a, 0x046a, + 0x046c, 0x046c, + 0x046e, 0x046e, + 0x0470, 0x0470, + 0x0472, 0x0472, + 0x0474, 0x0474, + 0x0476, 0x0476, + 0x0478, 0x0478, + 0x047a, 0x047a, + 0x047c, 0x047c, + 0x047e, 0x047e, + 0x0480, 0x0480, + 0x048a, 0x048a, + 0x048c, 0x048c, + 0x048e, 0x048e, + 0x0490, 0x0490, + 0x0492, 0x0492, + 0x0494, 0x0494, + 0x0496, 0x0496, + 0x0498, 0x0498, + 0x049a, 0x049a, + 0x049c, 0x049c, + 0x049e, 0x049e, + 0x04a0, 0x04a0, + 0x04a2, 0x04a2, + 0x04a4, 0x04a4, + 0x04a6, 0x04a6, + 0x04a8, 0x04a8, + 0x04aa, 0x04aa, + 0x04ac, 0x04ac, + 0x04ae, 0x04ae, + 0x04b0, 0x04b0, + 0x04b2, 0x04b2, + 0x04b4, 0x04b4, + 0x04b6, 0x04b6, + 0x04b8, 0x04b8, + 0x04ba, 0x04ba, + 0x04bc, 0x04bc, + 0x04be, 0x04be, + 0x04c0, 0x04c1, + 0x04c3, 0x04c3, + 0x04c5, 0x04c5, + 0x04c7, 0x04c7, + 0x04c9, 0x04c9, + 0x04cb, 0x04cb, + 0x04cd, 0x04cd, + 0x04d0, 0x04d0, + 0x04d2, 0x04d2, + 0x04d4, 0x04d4, + 0x04d6, 0x04d6, + 0x04d8, 0x04d8, + 0x04da, 0x04da, + 0x04dc, 0x04dc, + 0x04de, 0x04de, + 0x04e0, 0x04e0, + 0x04e2, 0x04e2, + 0x04e4, 0x04e4, + 0x04e6, 0x04e6, + 0x04e8, 0x04e8, + 0x04ea, 0x04ea, + 0x04ec, 0x04ec, + 0x04ee, 0x04ee, + 0x04f0, 0x04f0, + 0x04f2, 0x04f2, + 0x04f4, 0x04f4, + 0x04f8, 0x04f8, + 0x0500, 0x0500, + 0x0502, 0x0502, + 0x0504, 0x0504, + 0x0506, 0x0506, + 0x0508, 0x0508, + 0x050a, 0x050a, + 0x050c, 0x050c, + 0x050e, 0x050e, + 0x0531, 0x0556, + 0x10a0, 0x10c5, + 0x1e00, 0x1e00, + 0x1e02, 0x1e02, + 0x1e04, 0x1e04, + 0x1e06, 0x1e06, + 0x1e08, 0x1e08, + 0x1e0a, 0x1e0a, + 0x1e0c, 0x1e0c, + 0x1e0e, 0x1e0e, + 0x1e10, 0x1e10, + 0x1e12, 0x1e12, + 0x1e14, 0x1e14, + 0x1e16, 0x1e16, + 0x1e18, 0x1e18, + 0x1e1a, 0x1e1a, + 0x1e1c, 0x1e1c, + 0x1e1e, 0x1e1e, + 0x1e20, 0x1e20, + 0x1e22, 0x1e22, + 0x1e24, 0x1e24, + 0x1e26, 0x1e26, + 0x1e28, 0x1e28, + 0x1e2a, 0x1e2a, + 0x1e2c, 0x1e2c, + 0x1e2e, 0x1e2e, + 0x1e30, 0x1e30, + 0x1e32, 0x1e32, + 0x1e34, 0x1e34, + 0x1e36, 0x1e36, + 0x1e38, 0x1e38, + 0x1e3a, 0x1e3a, + 0x1e3c, 0x1e3c, + 0x1e3e, 0x1e3e, + 0x1e40, 0x1e40, + 0x1e42, 0x1e42, + 0x1e44, 0x1e44, + 0x1e46, 0x1e46, + 0x1e48, 0x1e48, + 0x1e4a, 0x1e4a, + 0x1e4c, 0x1e4c, + 0x1e4e, 0x1e4e, + 0x1e50, 0x1e50, + 0x1e52, 0x1e52, + 0x1e54, 0x1e54, + 0x1e56, 0x1e56, + 0x1e58, 0x1e58, + 0x1e5a, 0x1e5a, + 0x1e5c, 0x1e5c, + 0x1e5e, 0x1e5e, + 0x1e60, 0x1e60, + 0x1e62, 0x1e62, + 0x1e64, 0x1e64, + 0x1e66, 0x1e66, + 0x1e68, 0x1e68, + 0x1e6a, 0x1e6a, + 0x1e6c, 0x1e6c, + 0x1e6e, 0x1e6e, + 0x1e70, 0x1e70, + 0x1e72, 0x1e72, + 0x1e74, 0x1e74, + 0x1e76, 0x1e76, + 0x1e78, 0x1e78, + 0x1e7a, 0x1e7a, + 0x1e7c, 0x1e7c, + 0x1e7e, 0x1e7e, + 0x1e80, 0x1e80, + 0x1e82, 0x1e82, + 0x1e84, 0x1e84, + 0x1e86, 0x1e86, + 0x1e88, 0x1e88, + 0x1e8a, 0x1e8a, + 0x1e8c, 0x1e8c, + 0x1e8e, 0x1e8e, + 0x1e90, 0x1e90, + 0x1e92, 0x1e92, + 0x1e94, 0x1e94, + 0x1ea0, 0x1ea0, + 0x1ea2, 0x1ea2, + 0x1ea4, 0x1ea4, + 0x1ea6, 0x1ea6, + 0x1ea8, 0x1ea8, + 0x1eaa, 0x1eaa, + 0x1eac, 0x1eac, + 0x1eae, 0x1eae, + 0x1eb0, 0x1eb0, + 0x1eb2, 0x1eb2, + 0x1eb4, 0x1eb4, + 0x1eb6, 0x1eb6, + 0x1eb8, 0x1eb8, + 0x1eba, 0x1eba, + 0x1ebc, 0x1ebc, + 0x1ebe, 0x1ebe, + 0x1ec0, 0x1ec0, + 0x1ec2, 0x1ec2, + 0x1ec4, 0x1ec4, + 0x1ec6, 0x1ec6, + 0x1ec8, 0x1ec8, + 0x1eca, 0x1eca, + 0x1ecc, 0x1ecc, + 0x1ece, 0x1ece, + 0x1ed0, 0x1ed0, + 0x1ed2, 0x1ed2, + 0x1ed4, 0x1ed4, + 0x1ed6, 0x1ed6, + 0x1ed8, 0x1ed8, + 0x1eda, 0x1eda, + 0x1edc, 0x1edc, + 0x1ede, 0x1ede, + 0x1ee0, 0x1ee0, + 0x1ee2, 0x1ee2, + 0x1ee4, 0x1ee4, + 0x1ee6, 0x1ee6, + 0x1ee8, 0x1ee8, + 0x1eea, 0x1eea, + 0x1eec, 0x1eec, + 0x1eee, 0x1eee, + 0x1ef0, 0x1ef0, + 0x1ef2, 0x1ef2, + 0x1ef4, 0x1ef4, + 0x1ef6, 0x1ef6, + 0x1ef8, 0x1ef8, + 0x1f08, 0x1f0f, + 0x1f18, 0x1f1d, + 0x1f28, 0x1f2f, + 0x1f38, 0x1f3f, + 0x1f48, 0x1f4d, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f5f, + 0x1f68, 0x1f6f, + 0x1fb8, 0x1fbb, + 0x1fc8, 0x1fcb, + 0x1fd8, 0x1fdb, + 0x1fe8, 0x1fec, + 0x1ff8, 0x1ffb, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210b, 0x210d, + 0x2110, 0x2112, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x2130, 0x2131, + 0x2133, 0x2133, + 0x213e, 0x213f, + 0x2145, 0x2145, + 0xff21, 0xff3a, + 0x10400, 0x10427, + 0x1d400, 0x1d419, + 0x1d434, 0x1d44d, + 0x1d468, 0x1d481, + 0x1d49c, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b5, + 0x1d4d0, 0x1d4e9, + 0x1d504, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d538, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d56c, 0x1d585, + 0x1d5a0, 0x1d5b9, + 0x1d5d4, 0x1d5ed, + 0x1d608, 0x1d621, + 0x1d63c, 0x1d655, + 0x1d670, 0x1d689, + 0x1d6a8, 0x1d6c0, + 0x1d6e2, 0x1d6fa, + 0x1d71c, 0x1d734, + 0x1d756, 0x1d76e, + 0x1d790, 0x1d7a8 +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of MBUpper */ + +static OnigCodePoint SBXDigit[] = { + 3, + 0x0030, 0x0039, + 0x0041, 0x0046, + 0x0061, 0x0066 +}; + +static OnigCodePoint SBASCII[] = { + 1, + 0x0000, 0x007f +}; + +static OnigCodePoint SBWord[] = { + 4, + 0x0030, 0x0039, + 0x0041, 0x005a, + 0x005f, 0x005f, + 0x0061, 0x007a +}; + +static OnigCodePoint MBWord[] = { +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + 432, +#else + 8, +#endif + 0x00aa, 0x00aa, + 0x00b2, 0x00b3, + 0x00b5, 0x00b5, + 0x00b9, 0x00ba, + 0x00bc, 0x00be, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, +#ifndef USE_UNICODE_FULL_RANGE_CTYPE + 0x00f8, 0x7fffffff +#else /* not USE_UNICODE_FULL_RANGE_CTYPE */ + 0x00f8, 0x0236, + 0x0250, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ee, 0x02ee, + 0x0300, 0x0357, + 0x035d, 0x036f, + 0x037a, 0x037a, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03f5, + 0x03f7, 0x03fb, + 0x0400, 0x0481, + 0x0483, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f5, + 0x04f8, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x0559, + 0x0561, 0x0587, + 0x0591, 0x05a1, + 0x05a3, 0x05b9, + 0x05bb, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c4, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0610, 0x0615, + 0x0621, 0x063a, + 0x0640, 0x0658, + 0x0660, 0x0669, + 0x066e, 0x06d3, + 0x06d5, 0x06dc, + 0x06de, 0x06e8, + 0x06ea, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x074a, + 0x074d, 0x074f, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0963, + 0x0966, 0x096f, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09cd, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09f1, + 0x09f4, 0x09f9, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b66, 0x0b6f, + 0x0b71, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb5, + 0x0bb7, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0be7, 0x0bf2, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c66, 0x0c6f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d66, 0x0d6f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df3, + 0x0e01, 0x0e3a, + 0x0e40, 0x0e4e, + 0x0e50, 0x0e59, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f18, 0x0f19, + 0x0f20, 0x0f33, + 0x0f35, 0x0f35, + 0x0f37, 0x0f37, + 0x0f39, 0x0f39, + 0x0f3e, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f84, + 0x0f86, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fc6, 0x0fc6, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1040, 0x1049, + 0x1050, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10f8, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1206, + 0x1208, 0x1246, + 0x1248, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1286, + 0x1288, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12ae, + 0x12b0, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12ce, + 0x12d0, 0x12d6, + 0x12d8, 0x12ee, + 0x12f0, 0x130e, + 0x1310, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x131e, + 0x1320, 0x1346, + 0x1348, 0x135a, + 0x1369, 0x137c, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x1676, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x16ee, 0x16f0, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1734, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17b3, + 0x17b6, 0x17d3, + 0x17d7, 0x17d7, + 0x17dc, 0x17dd, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x180b, 0x180d, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1946, 0x196d, + 0x1970, 0x1974, + 0x1d00, 0x1d6b, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x203f, 0x2040, + 0x2054, 0x2054, + 0x2070, 0x2071, + 0x2074, 0x2079, + 0x207f, 0x2089, + 0x20d0, 0x20ea, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x212f, 0x2131, + 0x2133, 0x2139, + 0x213d, 0x213f, + 0x2145, 0x2149, + 0x2153, 0x2183, + 0x2460, 0x249b, + 0x24ea, 0x24ff, + 0x2776, 0x2793, + 0x3005, 0x3007, + 0x3021, 0x302f, + 0x3031, 0x3035, + 0x3038, 0x303c, + 0x3041, 0x3096, + 0x3099, 0x309a, + 0x309d, 0x309f, + 0x30a1, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x3192, 0x3195, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3220, 0x3229, + 0x3251, 0x325f, + 0x3280, 0x3289, + 0x32b1, 0x32bf, + 0x3400, 0x4db5, + 0x4e00, 0x9fa5, + 0xa000, 0xa48c, + 0xac00, 0xd7a3, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0xfe33, 0xfe34, + 0xfe4d, 0xfe4f, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff10, 0xff19, + 0xff21, 0xff3a, + 0xff3f, 0xff3f, + 0xff41, 0xff5a, + 0xff65, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10107, 0x10133, + 0x10300, 0x1031e, + 0x10320, 0x10323, + 0x10330, 0x1034a, + 0x10380, 0x1039d, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x1d165, 0x1d169, + 0x1d16d, 0x1d172, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a3, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9, + 0x1d7ce, 0x1d7ff, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0100, 0xe01ef +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ +}; /* end of MBWord */ + + +static int +utf8_get_ctype_code_range(int ctype, + OnigCodePoint* sbr[], OnigCodePoint* mbr[]) +{ +#define CR_SET(sbl,mbl) do { \ + *sbr = sbl; \ + *mbr = mbl; \ +} while (0) + +#define CR_SB_SET(sbl) do { \ + *sbr = sbl; \ + *mbr = EmptyRange; \ +} while (0) + + switch (ctype) { + case ONIGENC_CTYPE_ALPHA: + CR_SET(SBAlpha, MBAlpha); + break; + case ONIGENC_CTYPE_BLANK: + CR_SET(SBBlank, MBBlank); + break; + case ONIGENC_CTYPE_CNTRL: + CR_SET(SBCntrl, MBCntrl); + break; + case ONIGENC_CTYPE_DIGIT: + CR_SET(SBDigit, MBDigit); + break; + case ONIGENC_CTYPE_GRAPH: + CR_SET(SBGraph, MBGraph); + break; + case ONIGENC_CTYPE_LOWER: + CR_SET(SBLower, MBLower); + break; + case ONIGENC_CTYPE_PRINT: + CR_SET(SBPrint, MBPrint); + break; + case ONIGENC_CTYPE_PUNCT: + CR_SET(SBPunct, MBPunct); + break; + case ONIGENC_CTYPE_SPACE: + CR_SET(SBSpace, MBSpace); + break; + case ONIGENC_CTYPE_UPPER: + CR_SET(SBUpper, MBUpper); + break; + case ONIGENC_CTYPE_XDIGIT: + CR_SB_SET(SBXDigit); + break; + case ONIGENC_CTYPE_WORD: + CR_SET(SBWord, MBWord); + break; + case ONIGENC_CTYPE_ASCII: + CR_SB_SET(SBASCII); + break; + case ONIGENC_CTYPE_ALNUM: + CR_SET(SBAlnum, MBAlnum); + break; + + default: + return ONIGENCERR_TYPE_BUG; + break; + } + + return 0; +} + +static int +utf8_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + OnigCodePoint *range; +#endif + + if (code < 256) { + return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype); + } + +#ifdef USE_UNICODE_FULL_RANGE_CTYPE + + switch (ctype) { + case ONIGENC_CTYPE_ALPHA: + range = MBAlpha; + break; + case ONIGENC_CTYPE_BLANK: + range = MBBlank; + break; + case ONIGENC_CTYPE_CNTRL: + range = MBCntrl; + break; + case ONIGENC_CTYPE_DIGIT: + range = MBDigit; + break; + case ONIGENC_CTYPE_GRAPH: + range = MBGraph; + break; + case ONIGENC_CTYPE_LOWER: + range = MBLower; + break; + case ONIGENC_CTYPE_PRINT: + range = MBPrint; + break; + case ONIGENC_CTYPE_PUNCT: + range = MBPunct; + break; + case ONIGENC_CTYPE_SPACE: + range = MBSpace; + break; + case ONIGENC_CTYPE_UPPER: + range = MBUpper; + break; + case ONIGENC_CTYPE_XDIGIT: + return FALSE; + break; + case ONIGENC_CTYPE_WORD: + range = MBWord; + break; + case ONIGENC_CTYPE_ASCII: + return FALSE; + break; + case ONIGENC_CTYPE_ALNUM: + range = MBAlnum; + break; + + default: + return ONIGENCERR_TYPE_BUG; + break; + } + + return onig_is_in_code_range((UChar* )range, code); + +#else + + if ((ctype & ONIGENC_CTYPE_WORD) != 0) { +#ifdef USE_INVALID_CODE_SCHEME + if (code <= VALID_CODE_LIMIT) +#endif + return TRUE; + } +#endif /* USE_UNICODE_FULL_RANGE_CTYPE */ + + return FALSE; +} + +static UChar* +utf8_left_adjust_char_head(const UChar* start, const UChar* s) +{ + const UChar *p; + + if (s <= start) return (UChar* )s; + p = s; + + while (!utf8_islead(*p) && p > start) p--; + return (UChar* )p; +} + +OnigEncodingType OnigEncodingUTF8 = { + utf8_mbc_enc_len, + "UTF-8", /* name */ + 6, /* max byte length */ + 1, /* min byte length */ + (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | + ONIGENC_AMBIGUOUS_MATCH_COMPOUND), + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + }, + onigenc_is_mbc_newline_0x0a, + utf8_mbc_to_code, + utf8_code_to_mbclen, + utf8_code_to_mbc, + utf8_mbc_to_normalize, + utf8_is_mbc_ambiguous, + onigenc_iso_8859_1_get_all_pair_ambig_codes, + onigenc_ess_tsett_get_all_comp_ambig_codes, + utf8_is_code_ctype, + utf8_get_ctype_code_range, + utf8_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; +/********************************************************************** + + util.c - + + $Author: matz $ + $Date: 2004/09/21 03:08:31 $ + created at: Fri Mar 10 17:22:34 JST 1995 + + Copyright (C) 1993-2003 Yukihiro Matsumoto + +**********************************************************************/ + +#include "ruby.h" + +#include <ctype.h> +#include <stdio.h> +#include <errno.h> + +#ifdef _WIN32 +#include "missing/file.h" +#endif + +#include "util.h" +#ifndef HAVE_STRING_H +char *strchr _((char*,char)); +#endif + +unsigned long +scan_oct(start, len, retlen) + const char *start; + int len; + int *retlen; +{ + register const char *s = start; + register unsigned long retval = 0; + + while (len-- && *s >= '0' && *s <= '7') { + retval <<= 3; + retval |= *s++ - '0'; + } + *retlen = s - start; + return retval; +} + +unsigned long +scan_hex(start, len, retlen) + const char *start; + int len; + int *retlen; +{ + static char hexdigit[] = "0123456789abcdef0123456789ABCDEF"; + register const char *s = start; + register unsigned long retval = 0; + char *tmp; + + while (len-- && *s && (tmp = strchr(hexdigit, *s))) { + retval <<= 4; + retval |= (tmp - hexdigit) & 15; + s++; + } + *retlen = s - start; + return retval; +} + +#include <sys/types.h> +#include <sys/stat.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#if defined(HAVE_FCNTL_H) +#include <fcntl.h> +#endif + +#ifndef S_ISDIR +# define S_ISDIR(m) ((m & S_IFMT) == S_IFDIR) +#endif + +#if defined(MSDOS) || defined(__CYGWIN32__) || defined(_WIN32) +/* + * Copyright (c) 1993, Intergraph Corporation + * + * You may distribute under the terms of either the GNU General Public + * License or the Artistic License, as specified in the perl README file. + * + * Various Unix compatibility functions and NT specific functions. + * + * Some of this code was derived from the MSDOS port(s) and the OS/2 port. + * + */ + + +/* + * Suffix appending for in-place editing under MS-DOS and OS/2 (and now NT!). + * + * Here are the rules: + * + * Style 0: Append the suffix exactly as standard perl would do it. + * If the filesystem groks it, use it. (HPFS will always + * grok it. So will NTFS. FAT will rarely accept it.) + * + * Style 1: The suffix begins with a '.'. The extension is replaced. + * If the name matches the original name, use the fallback method. + * + * Style 2: The suffix is a single character, not a '.'. Try to add the + * suffix to the following places, using the first one that works. + * [1] Append to extension. + * [2] Append to filename, + * [3] Replace end of extension, + * [4] Replace end of filename. + * If the name matches the original name, use the fallback method. + * + * Style 3: Any other case: Ignore the suffix completely and use the + * fallback method. + * + * Fallback method: Change the extension to ".$$$". If that matches the + * original name, then change the extension to ".~~~". + * + * If filename is more than 1000 characters long, we die a horrible + * death. Sorry. + * + * The filename restriction is a cheat so that we can use buf[] to store + * assorted temporary goo. + * + * Examples, assuming style 0 failed. + * + * suffix = ".bak" (style 1) + * foo.bar => foo.bak + * foo.bak => foo.$$$ (fallback) + * foo.$$$ => foo.~~~ (fallback) + * makefile => makefile.bak + * + * suffix = "~" (style 2) + * foo.c => foo.c~ + * foo.c~ => foo.c~~ + * foo.c~~ => foo~.c~~ + * foo~.c~~ => foo~~.c~~ + * foo~~~~~.c~~ => foo~~~~~.$$$ (fallback) + * + * foo.pas => foo~.pas + * makefile => makefile.~ + * longname.fil => longname.fi~ + * longname.fi~ => longnam~.fi~ + * longnam~.fi~ => longnam~.$$$ + * + */ + + +static int valid_filename(char *s); + +static char suffix1[] = ".$$$"; +static char suffix2[] = ".~~~"; + +#define ext (&buf[1000]) + +#define strEQ(s1,s2) (strcmp(s1,s2) == 0) + +void +ruby_add_suffix(str, suffix) + VALUE str; + char *suffix; +{ + int baselen; + int extlen = strlen(suffix); + char *s, *t, *p; + long slen; + char buf[1024]; + + if (RSTRING(str)->len > 1000) + rb_fatal("Cannot do inplace edit on long filename (%ld characters)", + RSTRING(str)->len); + +#if defined(DJGPP) || defined(__CYGWIN32__) || defined(_WIN32) + /* Style 0 */ + slen = RSTRING(str)->len; + rb_str_cat(str, suffix, extlen); +#if defined(DJGPP) + if (_USE_LFN) return; +#else + if (valid_filename(RSTRING(str)->ptr)) return; +#endif + + /* Fooey, style 0 failed. Fix str before continuing. */ + RSTRING(str)->ptr[RSTRING(str)->len = slen] = '\0'; +#endif + + slen = extlen; + t = buf; baselen = 0; s = RSTRING(str)->ptr; + while ((*t = *s) && *s != '.') { + baselen++; + if (*s == '\\' || *s == '/') baselen = 0; + s++; t++; + } + p = t; + + t = ext; extlen = 0; + while (*t++ = *s++) extlen++; + if (extlen == 0) { ext[0] = '.'; ext[1] = 0; extlen++; } + + if (*suffix == '.') { /* Style 1 */ + if (strEQ(ext, suffix)) goto fallback; + strcpy(p, suffix); + } + else if (suffix[1] == '\0') { /* Style 2 */ + if (extlen < 4) { + ext[extlen] = *suffix; + ext[++extlen] = '\0'; + } + else if (baselen < 8) { + *p++ = *suffix; + } + else if (ext[3] != *suffix) { + ext[3] = *suffix; + } + else if (buf[7] != *suffix) { + buf[7] = *suffix; + } + else goto fallback; + strcpy(p, ext); + } + else { /* Style 3: Panic */ +fallback: + (void)memcpy(p, strEQ(ext, suffix1) ? suffix2 : suffix1, 5); + } + rb_str_resize(str, strlen(buf)); + memcpy(RSTRING(str)->ptr, buf, RSTRING(str)->len); +} + +#if defined(__CYGWIN32__) || defined(_WIN32) +static int +valid_filename(char *s) +{ + int fd; + + /* + // if the file exists, then it's a valid filename! + */ + + if (_access(s, 0) == 0) { + return 1; + } + + /* + // It doesn't exist, so see if we can open it. + */ + + if ((fd = _open(s, O_CREAT, 0666)) >= 0) { + _close(fd); + _unlink(s); /* don't leave it laying around */ + return 1; + } + return 0; +} +#endif +#endif + +#if defined __DJGPP__ + +#include <dpmi.h> + +static char dbcs_table[256]; + +int +make_dbcs_table() +{ + __dpmi_regs r; + struct { + unsigned char start; + unsigned char end; + } vec; + int offset; + + memset(&r, 0, sizeof(r)); + r.x.ax = 0x6300; + __dpmi_int(0x21, &r); + offset = r.x.ds * 16 + r.x.si; + + for (;;) { + int i; + dosmemget(offset, sizeof vec, &vec); + if (!vec.start && !vec.end) + break; + for (i = vec.start; i <= vec.end; i++) + dbcs_table[i] = 1; + offset += 2; + } +} + +int +mblen(const char *s, size_t n) +{ + static int need_init = 1; + if (need_init) { + make_dbcs_table(); + need_init = 0; + } + if (s) { + if (n == 0 || *s == 0) + return 0; + else if (!s[1]) + return 1; + return dbcs_table[(unsigned char)*s] + 1; + } + else + return 1; +} + +struct PathList { + struct PathList *next; + char *path; +}; + +struct PathInfo { + struct PathList *head; + int count; +}; + +static void +push_element(const char *path, VALUE vinfo) +{ + struct PathList *p; + struct PathInfo *info = (struct PathInfo *)vinfo; + + p = ALLOC(struct PathList); + MEMZERO(p, struct PathList, 1); + p->path = ruby_strdup(path); + p->next = info->head; + info->head = p; + info->count++; +} + +#include <dirent.h> +int __opendir_flags = __OPENDIR_PRESERVE_CASE; + +char ** +__crt0_glob_function(char *path) +{ + int len = strlen(path); + int i; + char **rv; + char path_buffer[PATH_MAX]; + char *buf = path_buffer; + char *p; + struct PathInfo info; + struct PathList *plist; + + if (PATH_MAX <= len) + buf = ruby_xmalloc(len + 1); + + strncpy(buf, path, len); + buf[len] = '\0'; + + for (p = buf; *p; p += mblen(p, RUBY_MBCHAR_MAXSIZE)) + if (*p == '\\') + *p = '/'; + + info.count = 0; + info.head = 0; + + rb_glob(buf, push_element, (VALUE)&info); + + if (buf != path_buffer) + ruby_xfree(buf); + + if (info.count == 0) + return 0; + + rv = ruby_xmalloc((info.count + 1) * sizeof (char *)); + + plist = info.head; + i = 0; + while (plist) { + struct PathList *cur; + rv[i] = plist->path; + cur = plist; + plist = plist->next; + ruby_xfree(cur); + i++; + } + rv[i] = 0; + return rv; +} + +#endif + +/* mm.c */ + +#define A ((int*)a) +#define B ((int*)b) +#define C ((int*)c) +#define D ((int*)d) + +#define mmprepare(base, size) do {\ + if (((long)base & (0x3)) == 0)\ + if (size >= 16) mmkind = 1;\ + else mmkind = 0;\ + else mmkind = -1;\ + high = (size & (~0xf));\ + low = (size & 0x0c);\ +} while (0)\ + +#define mmarg mmkind, size, high, low + +static void mmswap_(a, b, mmarg) + register char *a, *b; + int mmarg; +{ + register int s; + if (a == b) return; + if (mmkind >= 0) { + if (mmkind > 0) { + register char *t = a + high; + do { + s = A[0]; A[0] = B[0]; B[0] = s; + s = A[1]; A[1] = B[1]; B[1] = s; + s = A[2]; A[2] = B[2]; B[2] = s; + s = A[3]; A[3] = B[3]; B[3] = s; a += 16; b += 16; + } while (a < t); + } + if (low != 0) { s = A[0]; A[0] = B[0]; B[0] = s; + if (low >= 8) { s = A[1]; A[1] = B[1]; B[1] = s; + if (low == 12) {s = A[2]; A[2] = B[2]; B[2] = s;}}} + } + else { + register char *t = a + size; + do {s = *a; *a++ = *b; *b++ = s;} while (a < t); + } +} +#define mmswap(a,b) mmswap_((a),(b),mmarg) + +static void mmrot3_(a, b, c, mmarg) + register char *a, *b, *c; + int mmarg; +{ + register int s; + if (mmkind >= 0) { + if (mmkind > 0) { + register char *t = a + high; + do { + s = A[0]; A[0] = B[0]; B[0] = C[0]; C[0] = s; + s = A[1]; A[1] = B[1]; B[1] = C[1]; C[1] = s; + s = A[2]; A[2] = B[2]; B[2] = C[2]; C[2] = s; + s = A[3]; A[3] = B[3]; B[3] = C[3]; C[3] = s; a += 16; b += 16; c += 16; + } while (a < t); + } + if (low != 0) { s = A[0]; A[0] = B[0]; B[0] = C[0]; C[0] = s; + if (low >= 8) { s = A[1]; A[1] = B[1]; B[1] = C[1]; C[1] = s; + if (low == 12) {s = A[2]; A[2] = B[2]; B[2] = C[2]; C[2] = s;}}} + } + else { + register char *t = a + size; + do {s = *a; *a++ = *b; *b++ = *c; *c++ = s;} while (a < t); + } +} +#define mmrot3(a,b,c) mmrot3_((a),(b),(c),mmarg) + +/* qs6.c */ +/*****************************************************/ +/* */ +/* qs6 (Quick sort function) */ +/* */ +/* by Tomoyuki Kawamura 1995.4.21 */ +/* kawamura@tokuyama.ac.jp */ +/*****************************************************/ + +typedef struct { char *LL, *RR; } stack_node; /* Stack structure for L,l,R,r */ +#define PUSH(ll,rr) do { top->LL = (ll); top->RR = (rr); ++top; } while (0) /* Push L,l,R,r */ +#define POP(ll,rr) do { --top; ll = top->LL; rr = top->RR; } while (0) /* Pop L,l,R,r */ + +#define med3(a,b,c) ((*cmp)(a,b,d)<0 ? \ + ((*cmp)(b,c,d)<0 ? b : ((*cmp)(a,c,d)<0 ? c : a)) : \ + ((*cmp)(b,c,d)>0 ? b : ((*cmp)(a,c,d)<0 ? a : c))) + +void ruby_qsort (base, nel, size, cmp, d) + void* base; + const int nel; + const int size; + int (*cmp)(); + void *d; +{ + register char *l, *r, *m; /* l,r:left,right group m:median point */ + register int t, eq_l, eq_r; /* eq_l: all items in left group are equal to S */ + char *L = base; /* left end of curren region */ + char *R = (char*)base + size*(nel-1); /* right end of current region */ + int chklim = 63; /* threshold of ordering element check */ + stack_node stack[32], *top = stack; /* 32 is enough for 32bit CPU */ + int mmkind, high, low; + + if (nel <= 1) return; /* need not to sort */ + mmprepare(base, size); + goto start; + + nxt: + if (stack == top) return; /* return if stack is empty */ + POP(L,R); + + for (;;) { + start: + if (L + size == R) { /* 2 elements */ + if ((*cmp)(L,R,d) > 0) mmswap(L,R); goto nxt; + } + + l = L; r = R; + t = (r - l + size) / size; /* number of elements */ + m = l + size * (t >> 1); /* calculate median value */ + + if (t >= 60) { + register char *m1; + register char *m3; + if (t >= 200) { + t = size*(t>>3); /* number of bytes in splitting 8 */ + { + register char *p1 = l + t; + register char *p2 = p1 + t; + register char *p3 = p2 + t; + m1 = med3(p1, p2, p3); + p1 = m + t; + p2 = p1 + t; + p3 = p2 + t; + m3 = med3(p1, p2, p3); + } + } + else { + t = size*(t>>2); /* number of bytes in splitting 4 */ + m1 = l + t; + m3 = m + t; + } + m = med3(m1, m, m3); + } + + if ((t = (*cmp)(l,m,d)) < 0) { /*3-5-?*/ + if ((t = (*cmp)(m,r,d)) < 0) { /*3-5-7*/ + if (chklim && nel >= chklim) { /* check if already ascending order */ + char *p; + chklim = 0; + for (p=l; p<r; p+=size) if ((*cmp)(p,p+size,d) > 0) goto fail; + goto nxt; + } + fail: goto loopA; /*3-5-7*/ + } + if (t > 0) { + if ((*cmp)(l,r,d) <= 0) {mmswap(m,r); goto loopA;} /*3-5-4*/ + mmrot3(r,m,l); goto loopA; /*3-5-2*/ + } + goto loopB; /*3-5-5*/ + } + + if (t > 0) { /*7-5-?*/ + if ((t = (*cmp)(m,r,d)) > 0) { /*7-5-3*/ + if (chklim && nel >= chklim) { /* check if already ascending order */ + char *p; + chklim = 0; + for (p=l; p<r; p+=size) if ((*cmp)(p,p+size,d) < 0) goto fail2; + while (l<r) {mmswap(l,r); l+=size; r-=size;} /* reverse region */ + goto nxt; + } + fail2: mmswap(l,r); goto loopA; /*7-5-3*/ + } + if (t < 0) { + if ((*cmp)(l,r,d) <= 0) {mmswap(l,m); goto loopB;} /*7-5-8*/ + mmrot3(l,m,r); goto loopA; /*7-5-6*/ + } + mmswap(l,r); goto loopA; /*7-5-5*/ + } + + if ((t = (*cmp)(m,r,d)) < 0) {goto loopA;} /*5-5-7*/ + if (t > 0) {mmswap(l,r); goto loopB;} /*5-5-3*/ + + /* determining splitting type in case 5-5-5 */ /*5-5-5*/ + for (;;) { + if ((l += size) == r) goto nxt; /*5-5-5*/ + if (l == m) continue; + if ((t = (*cmp)(l,m,d)) > 0) {mmswap(l,r); l = L; goto loopA;}/*575-5*/ + if (t < 0) {mmswap(L,l); l = L; goto loopB;} /*535-5*/ + } + + loopA: eq_l = 1; eq_r = 1; /* splitting type A */ /* left <= median < right */ + for (;;) { + for (;;) { + if ((l += size) == r) + {l -= size; if (l != m) mmswap(m,l); l -= size; goto fin;} + if (l == m) continue; + if ((t = (*cmp)(l,m,d)) > 0) {eq_r = 0; break;} + if (t < 0) eq_l = 0; + } + for (;;) { + if (l == (r -= size)) + {l -= size; if (l != m) mmswap(m,l); l -= size; goto fin;} + if (r == m) {m = l; break;} + if ((t = (*cmp)(r,m,d)) < 0) {eq_l = 0; break;} + if (t == 0) break; + } + mmswap(l,r); /* swap left and right */ + } + + loopB: eq_l = 1; eq_r = 1; /* splitting type B */ /* left < median <= right */ + for (;;) { + for (;;) { + if (l == (r -= size)) + {r += size; if (r != m) mmswap(r,m); r += size; goto fin;} + if (r == m) continue; + if ((t = (*cmp)(r,m,d)) < 0) {eq_l = 0; break;} + if (t > 0) eq_r = 0; + } + for (;;) { + if ((l += size) == r) + {r += size; if (r != m) mmswap(r,m); r += size; goto fin;} + if (l == m) {m = r; break;} + if ((t = (*cmp)(l,m,d)) > 0) {eq_r = 0; break;} + if (t == 0) break; + } + mmswap(l,r); /* swap left and right */ + } + + fin: + if (eq_l == 0) /* need to sort left side */ + if (eq_r == 0) /* need to sort right side */ + if (l-L < R-r) {PUSH(r,R); R = l;} /* sort left side first */ + else {PUSH(L,l); L = r;} /* sort right side first */ + else R = l; /* need to sort left side only */ + else if (eq_r == 0) L = r; /* need to sort right side only */ + else goto nxt; /* need not to sort both sides */ + } +} + +char * +ruby_strdup(str) + const char *str; +{ + char *tmp; + int len = strlen(str) + 1; + + tmp = xmalloc(len); + memcpy(tmp, str, len); + + return tmp; +} + +char * +ruby_getcwd() +{ +#ifdef HAVE_GETCWD + int size = 200; + char *buf = xmalloc(size); + + while (!getcwd(buf, size)) { + if (errno != ERANGE) { + free(buf); + rb_sys_fail("getcwd"); + } + size *= 2; + buf = xrealloc(buf, size); + } +#else +# ifndef PATH_MAX +# define PATH_MAX 8192 +# endif + char *buf = xmalloc(PATH_MAX+1); + + if (!getwd(buf)) { + free(buf); + rb_sys_fail("getwd"); + } +#endif + return buf; +} + +/* copyright notice for strtod implementation -- + * + * Copyright (c) 1988-1993 The Regents of the University of California. + * Copyright (c) 1994 Sun Microsystems, Inc. + * + * Permission to use, copy, modify, and distribute this + * software and its documentation for any purpose and without + * fee is hereby granted, provided that the above copyright + * notice appear in all copies. The University of California + * makes no representations about the suitability of this + * software for any purpose. It is provided "as is" without + * express or implied warranty. + * + */ + +#define TRUE 1 +#define FALSE 0 + +static int MDMINEXPT = -323; +static int MDMAXEXPT = 309; +static double powersOf10[] = { /* Table giving binary powers of 10. Entry */ + 10.0, /* is 10^2^i. Used to convert decimal */ + 100.0, /* exponents into floating-point numbers. */ + 1.0e4, + 1.0e8, + 1.0e16, + 1.0e32, + 1.0e64, + 1.0e128, + 1.0e256 +}; + +/* + *---------------------------------------------------------------------- + * + * strtod -- + * + * This procedure converts a floating-point number from an ASCII + * decimal representation to internal double-precision format. + * + * Results: + * The return value is the double-precision floating-point + * representation of the characters in string. If endPtr isn't + * NULL, then *endPtr is filled in with the address of the + * next character after the last one that was part of the + * floating-point number. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +double +ruby_strtod(string, endPtr) + const char *string; /* A decimal ASCII floating-point number, + * optionally preceded by white space. + * Must have form "-I.FE-X", where I is the + * integer part of the mantissa, F is the + * fractional part of the mantissa, and X + * is the exponent. Either of the signs + * may be "+", "-", or omitted. Either I + * or F may be omitted, but both cannot be + * ommitted at once. The decimal + * point isn't necessary unless F is present. + * The "E" may actually be an "e". E and X + * may both be omitted (but not just one). + */ + char **endPtr; /* If non-NULL, store terminating character's + * address here. */ +{ + int sign, expSign = FALSE; + double fraction, dblExp, *d; + register const char *p; + register int c; + int exp = 0; /* Exponent read from "EX" field. */ + int fracExp = 0; /* Exponent that derives from the fractional + * part. Under normal circumstatnces, it is + * the negative of the number of digits in F. + * However, if I is very long, the last digits + * of I get dropped (otherwise a long I with a + * large negative exponent could cause an + * unnecessary overflow on I alone). In this + * case, fracExp is incremented one for each + * dropped digit. */ + int mantSize = 0; /* Number of digits in mantissa. */ + int hasPoint = FALSE; /* Decimal point exists. */ + int hasDigit = FALSE; /* I or F exists. */ + const char *pMant; /* Temporarily holds location of mantissa + * in string. */ + const char *pExp; /* Temporarily holds location of exponent + * in string. */ + + /* + * Strip off leading blanks and check for a sign. + */ + + errno = 0; + p = string; + while (ISSPACE(*p)) { + p += 1; + } + if (*p == '-') { + sign = TRUE; + p += 1; + } + else { + if (*p == '+') { + p += 1; + } + sign = FALSE; + } + + /* + * Count the number of digits in the mantissa + * and also locate the decimal point. + */ + + for ( ; c = *p; p += 1) { + if (!ISDIGIT(c)) { + if (c != '.' || hasPoint) { + break; + } + hasPoint = TRUE; + } + else { + if (hasPoint) { /* already in fractional part */ + fracExp -= 1; + } + if (mantSize) { /* already in mantissa */ + mantSize += 1; + } + else if (c != '0') { /* have entered mantissa */ + mantSize += 1; + pMant = p; + } + hasDigit = TRUE; + } + } + + /* + * Now suck up the digits in the mantissa. Use two integers to + * collect 9 digits each (this is faster than using floating-point). + * If the mantissa has more than 18 digits, ignore the extras, since + * they can't affect the value anyway. + */ + + pExp = p; + if (mantSize) { + p = pMant; + } + if (mantSize > 18) { + fracExp += (mantSize - 18); + mantSize = 18; + } + if (!hasDigit) { + fraction = 0.0; + p = string; + } + else { + int frac1, frac2; + frac1 = 0; + for ( ; mantSize > 9; mantSize -= 1) { + c = *p; + p += 1; + if (c == '.') { + c = *p; + p += 1; + } + frac1 = 10*frac1 + (c - '0'); + } + frac2 = 0; + for (; mantSize > 0; mantSize -= 1) { + c = *p; + p += 1; + if (c == '.') { + c = *p; + p += 1; + } + frac2 = 10*frac2 + (c - '0'); + } + + /* + * Skim off the exponent. + */ + + p = pExp; + if ((*p == 'E') || (*p == 'e')) { + p += 1; + if (*p == '-') { + expSign = TRUE; + p += 1; + } + else { + if (*p == '+') { + p += 1; + } + expSign = FALSE; + } + while (ISDIGIT(*p)) { + exp = exp * 10 + (*p - '0'); + p += 1; + } + } + if (expSign) { + exp = fracExp - exp; + } + else { + exp = fracExp + exp; + } + + /* + * Generate a floating-point number that represents the exponent. + * Do this by processing the exponent one bit at a time to combine + * many powers of 2 of 10. Then combine the exponent with the + * fraction. + */ + + if (exp >= MDMAXEXPT - 18) { + exp = MDMAXEXPT; + errno = ERANGE; + } + else if (exp < MDMINEXPT + 18) { + exp = MDMINEXPT; + errno = ERANGE; + } + fracExp = exp; + exp += 9; + if (exp < 0) { + expSign = TRUE; + exp = -exp; + } + else { + expSign = FALSE; + } + dblExp = 1.0; + for (d = powersOf10; exp != 0; exp >>= 1, d += 1) { + if (exp & 01) { + dblExp *= *d; + } + } + if (expSign) { + fraction = frac1 / dblExp; + } + else { + fraction = frac1 * dblExp; + } + exp = fracExp; + if (exp < 0) { + expSign = TRUE; + exp = -exp; + } + else { + expSign = FALSE; + } + dblExp = 1.0; + for (d = powersOf10; exp != 0; exp >>= 1, d += 1) { + if (exp & 01) { + dblExp *= *d; + } + } + if (expSign) { + fraction += frac2 / dblExp; + } + else { + fraction += frac2 * dblExp; + } + } + + if (endPtr != NULL) { + *endPtr = (char *) p; + } + + if (sign) { + return -fraction; + } + return fraction; +} +/********************************************************************** + + variable.c - + + $Author: matz $ + $Date: 2005/03/04 06:47:41 $ + created at: Tue Apr 19 23:55:15 JST 1994 + + Copyright (C) 1993-2003 Yukihiro Matsumoto + Copyright (C) 2000 Network Applied Communication Laboratory, Inc. + Copyright (C) 2000 Information-technology Promotion Agency, Japan + +**********************************************************************/ + +#include "ruby.h" +#include "env.h" +#include "node.h" +#include "st.h" +#include "util.h" + +static st_table *rb_global_tbl; +st_table *rb_class_tbl; +static ID autoload, classpath, tmp_classpath; + +void +Init_var_tables() +{ + rb_global_tbl = st_init_numtable(); + rb_class_tbl = st_init_numtable(); + autoload = rb_intern("__autoload__"); + classpath = rb_intern("__classpath__"); + tmp_classpath = rb_intern("__tmp_classpath__"); +} + +struct fc_result { + ID name; + VALUE klass; + VALUE path; + VALUE track; + struct fc_result *prev; +}; + +static VALUE +fc_path(fc, name) + struct fc_result *fc; + ID name; +{ + VALUE path, tmp; + + path = rb_str_new2(rb_id2name(name)); + while (fc) { + if (fc->track == rb_cObject) break; + if (ROBJECT(fc->track)->iv_tbl && + st_lookup(ROBJECT(fc->track)->iv_tbl, classpath, &tmp)) { + tmp = rb_str_dup(tmp); + rb_str_cat2(tmp, "::"); + rb_str_append(tmp, path); + + return tmp; + } + tmp = rb_str_new2(rb_id2name(fc->name)); + rb_str_cat2(tmp, "::"); + rb_str_append(tmp, path); + path = tmp; + fc = fc->prev; + } + return path; +} + +static int +fc_i(key, value, res) + ID key; + VALUE value; + struct fc_result *res; +{ + if (!rb_is_const_id(key)) return ST_CONTINUE; + + if (value == res->klass) { + res->path = fc_path(res, key); + return ST_STOP; + } + switch (TYPE(value)) { + case T_MODULE: + case T_CLASS: + if (!RCLASS(value)->iv_tbl) return ST_CONTINUE; + else { + struct fc_result arg; + struct fc_result *list; + + list = res; + while (list) { + if (list->track == value) return ST_CONTINUE; + list = list->prev; + } + + arg.name = key; + arg.path = 0; + arg.klass = res->klass; + arg.track = value; + arg.prev = res; + st_foreach(RCLASS(value)->iv_tbl, fc_i, (st_data_t)&arg); + if (arg.path) { + res->path = arg.path; + return ST_STOP; + } + } + break; + + default: + break; + } + return ST_CONTINUE; +} + +static VALUE +find_class_path(klass) + VALUE klass; +{ + struct fc_result arg; + + arg.name = 0; + arg.path = 0; + arg.klass = klass; + arg.track = rb_cObject; + arg.prev = 0; + if (RCLASS(rb_cObject)->iv_tbl) { + st_foreach_safe(RCLASS(rb_cObject)->iv_tbl, fc_i, (st_data_t)&arg); + } + if (arg.path == 0) { + st_foreach_safe(rb_class_tbl, fc_i, (st_data_t)&arg); + } + if (arg.path) { + if (!ROBJECT(klass)->iv_tbl) { + ROBJECT(klass)->iv_tbl = st_init_numtable(); + } + st_insert(ROBJECT(klass)->iv_tbl, classpath, arg.path); + st_delete(RCLASS(klass)->iv_tbl, &tmp_classpath, 0); + return arg.path; + } + return Qnil; +} + +static VALUE +classname(klass) + VALUE klass; +{ + VALUE path = Qnil; + + if (!klass) klass = rb_cObject; + if (ROBJECT(klass)->iv_tbl) { + if (!st_lookup(ROBJECT(klass)->iv_tbl, classpath, &path)) { + ID classid = rb_intern("__classid__"); + + if (!st_lookup(ROBJECT(klass)->iv_tbl, classid, &path)) { + return find_class_path(klass); + } + path = rb_str_new2(rb_id2name(SYM2ID(path))); + st_insert(ROBJECT(klass)->iv_tbl, classpath, path); + st_delete(RCLASS(klass)->iv_tbl, (st_data_t*)&classid, 0); + } + if (TYPE(path) != T_STRING) { + rb_bug("class path is not set properly"); + } + return path; + } + return find_class_path(klass); +} + +/* + * call-seq: + * mod.name => string + * + * Returns the name of the module <i>mod</i>. + */ + +VALUE +rb_mod_name(mod) + VALUE mod; +{ + VALUE path = classname(mod); + + if (!NIL_P(path)) return rb_str_dup(path); + return rb_str_new(0,0); +} + +VALUE +rb_class_path(klass) + VALUE klass; +{ + VALUE path = classname(klass); + + if (!NIL_P(path)) return path; + if (RCLASS(klass)->iv_tbl && st_lookup(RCLASS(klass)->iv_tbl, + tmp_classpath, &path)) { + return path; + } + else { + char *s = "Class"; + + if (TYPE(klass) == T_MODULE) { + if (rb_obj_class(klass) == rb_cModule) { + s = "Module"; + } + else { + s = rb_class2name(RBASIC(klass)->klass); + } + } + path = rb_str_new(0, 2 + strlen(s) + 3 + 2 * SIZEOF_LONG + 1); + sprintf(RSTRING(path)->ptr, "#<%s:0x%lx>", s, klass); + RSTRING(path)->len = strlen(RSTRING(path)->ptr); + rb_ivar_set(klass, tmp_classpath, path); + + return path; + } +} + +void +rb_set_class_path(klass, under, name) + VALUE klass, under; + const char *name; +{ + VALUE str; + + if (under == rb_cObject) { + str = rb_str_new2(name); + } + else { + str = rb_str_dup(rb_class_path(under)); + rb_str_cat2(str, "::"); + rb_str_cat2(str, name); + } + rb_ivar_set(klass, classpath, str); +} + +VALUE +rb_path2class(path) + const char *path; +{ + const char *pbeg, *p; + ID id; + VALUE c = rb_cObject; + + if (path[0] == '#') { + rb_raise(rb_eArgError, "can't retrieve anonymous class %s", path); + } + pbeg = p = path; + while (*p) { + VALUE str; + + while (*p && *p != ':') p++; + str = rb_str_new(pbeg, p-pbeg); + id = rb_intern(RSTRING(str)->ptr); + if (p[0] == ':') { + if (p[1] != ':') goto undefined_class; + p += 2; + pbeg = p; + } + if (!rb_const_defined(c, id)) { + undefined_class: + rb_raise(rb_eArgError, "undefined class/module %.*s", p-path, path); + } + c = rb_const_get_at(c, id); + switch (TYPE(c)) { + case T_MODULE: + case T_CLASS: + break; + default: + rb_raise(rb_eTypeError, "%s does not refer class/module", path); + } + } + + return c; +} + +void +rb_name_class(klass, id) + VALUE klass; + ID id; +{ + rb_iv_set(klass, "__classid__", ID2SYM(id)); +} + +VALUE +rb_class_name(klass) + VALUE klass; +{ + return rb_class_path(rb_class_real(klass)); +} + +char * +rb_class2name(klass) + VALUE klass; +{ + return RSTRING(rb_class_name(klass))->ptr; +} + +char * +rb_obj_classname(obj) + VALUE obj; +{ + return rb_class2name(CLASS_OF(obj)); +} + +struct trace_var { + int removed; + void (*func)(); + VALUE data; + struct trace_var *next; +}; + +struct global_variable { + int counter; + void *data; + VALUE (*getter)(); + void (*setter)(); + void (*marker)(); + int block_trace; + struct trace_var *trace; +}; + +struct global_entry { + struct global_variable *var; + ID id; +}; + +static VALUE undef_getter(); +static void undef_setter(); +static void undef_marker(); + +static VALUE val_getter(); +static void val_setter(); +static void val_marker(); + +static VALUE var_getter(); +static void var_setter(); +static void var_marker(); + +struct global_entry* +rb_global_entry(id) + ID id; +{ + struct global_entry *entry; + + if (!st_lookup(rb_global_tbl, id, (st_data_t *)&entry)) { + struct global_variable *var; + entry = ALLOC(struct global_entry); + var = ALLOC(struct global_variable); + entry->id = id; + entry->var = var; + var->counter = 1; + var->data = 0; + var->getter = undef_getter; + var->setter = undef_setter; + var->marker = undef_marker; + + var->block_trace = 0; + var->trace = 0; + st_add_direct(rb_global_tbl, id, (st_data_t)entry); + } + return entry; +} + +static VALUE +undef_getter(id) + ID id; +{ + rb_warning("global variable `%s' not initialized", rb_id2name(id)); + + return Qnil; +} + +static void +undef_setter(val, id, data, var) + VALUE val; + ID id; + void *data; + struct global_variable *var; +{ + var->getter = val_getter; + var->setter = val_setter; + var->marker = val_marker; + + var->data = (void*)val; +} + +static void +undef_marker() +{ +} + +static VALUE +val_getter(id, val) + ID id; + VALUE val; +{ + return val; +} + +static void +val_setter(val, id, data, var) + VALUE val; + ID id; + void *data; + struct global_variable *var; +{ + var->data = (void*)val; +} + +static void +val_marker(data) + VALUE data; +{ + if (data) rb_gc_mark_maybe(data); +} + +static VALUE +var_getter(id, var) + ID id; + VALUE *var; +{ + if (!var) return Qnil; + return *var; +} + +static void +var_setter(val, id, var) + VALUE val; + ID id; + VALUE *var; +{ + *var = val; +} + +static void +var_marker(var) + VALUE *var; +{ + if (var) rb_gc_mark_maybe(*var); +} + +static void +readonly_setter(val, id, var) + VALUE val; + ID id; + void *var; +{ + rb_name_error(id, "%s is a read-only variable", rb_id2name(id)); +} + +static int +mark_global_entry(key, entry) + ID key; + struct global_entry *entry; +{ + struct trace_var *trace; + struct global_variable *var = entry->var; + + (*var->marker)(var->data); + trace = var->trace; + while (trace) { + if (trace->data) rb_gc_mark_maybe(trace->data); + trace = trace->next; + } + return ST_CONTINUE; +} + +void +rb_gc_mark_global_tbl() +{ + st_foreach_safe(rb_global_tbl, mark_global_entry, 0); +} + +static ID +global_id(name) + const char *name; +{ + ID id; + + if (name[0] == '$') id = rb_intern(name); + else { + char *buf = ALLOCA_N(char, strlen(name)+2); + buf[0] = '$'; + strcpy(buf+1, name); + id = rb_intern(buf); + } + return id; +} + +void +rb_define_hooked_variable(name, var, getter, setter) + const char *name; + VALUE *var; + VALUE (*getter)(); + void (*setter)(); +{ + struct global_variable *gvar; + ID id = global_id(name); + + gvar = rb_global_entry(id)->var; + gvar->data = (void*)var; + gvar->getter = getter?getter:var_getter; + gvar->setter = setter?setter:var_setter; + gvar->marker = var_marker; +} + +void +rb_define_variable(name, var) + const char *name; + VALUE *var; +{ + rb_define_hooked_variable(name, var, 0, 0); +} + +void +rb_define_readonly_variable(name, var) + const char *name; + VALUE *var; +{ + rb_define_hooked_variable(name, var, 0, readonly_setter); +} + +void +rb_define_virtual_variable(name, getter, setter) + const char *name; + VALUE (*getter)(); + void (*setter)(); +{ + if (!getter) getter = val_getter; + if (!setter) setter = readonly_setter; + rb_define_hooked_variable(name, 0, getter, setter); +} + +static void +rb_trace_eval(cmd, val) + VALUE cmd, val; +{ + rb_eval_cmd(cmd, rb_ary_new3(1, val), 0); +} + +/* + * call-seq: + * trace_var(symbol, cmd ) => nil + * trace_var(symbol) {|val| block } => nil + * + * Controls tracing of assignments to global variables. The parameter + * +symbol_ identifies the variable (as either a string name or a + * symbol identifier). _cmd_ (which may be a string or a + * +Proc+ object) or block is executed whenever the variable + * is assigned. The block or +Proc+ object receives the + * variable's new value as a parameter. Also see + * <code>Kernel::untrace_var</code>. + * + * trace_var :$_, proc {|v| puts "$_ is now '#{v}'" } + * $_ = "hello" + * $_ = ' there' + * + * <em>produces:</em> + * + * $_ is now 'hello' + * $_ is now ' there' + */ + +VALUE +rb_f_trace_var(argc, argv) + int argc; + VALUE *argv; +{ + VALUE var, cmd; + struct global_entry *entry; + struct trace_var *trace; + + rb_secure(4); + if (rb_scan_args(argc, argv, "11", &var, &cmd) == 1) { + cmd = rb_block_proc(); + } + if (NIL_P(cmd)) { + return rb_f_untrace_var(argc, argv); + } + entry = rb_global_entry(rb_to_id(var)); + if (OBJ_TAINTED(cmd)) { + rb_raise(rb_eSecurityError, "Insecure: tainted variable trace"); + } + trace = ALLOC(struct trace_var); + trace->next = entry->var->trace; + trace->func = rb_trace_eval; + trace->data = cmd; + trace->removed = 0; + entry->var->trace = trace; + + return Qnil; +} + +static void +remove_trace(var) + struct global_variable *var; +{ + struct trace_var *trace = var->trace; + struct trace_var t; + struct trace_var *next; + + t.next = trace; + trace = &t; + while (trace->next) { + next = trace->next; + if (next->removed) { + trace->next = next->next; + free(next); + } + else { + trace = next; + } + } + var->trace = t.next; +} + +/* + * call-seq: + * untrace_var(symbol [, cmd] ) => array or nil + * + * Removes tracing for the specified command on the given global + * variable and returns +nil+. If no command is specified, + * removes all tracing for that variable and returns an array + * containing the commands actually removed. + */ + +VALUE +rb_f_untrace_var(argc, argv) + int argc; + VALUE *argv; +{ + VALUE var, cmd; + ID id; + struct global_entry *entry; + struct trace_var *trace; + + rb_scan_args(argc, argv, "11", &var, &cmd); + id = rb_to_id(var); + if (!st_lookup(rb_global_tbl, id, (st_data_t *)&entry)) { + rb_name_error(id, "undefined global variable %s", rb_id2name(id)); + } + + trace = entry->var->trace; + if (NIL_P(cmd)) { + VALUE ary = rb_ary_new(); + + while (trace) { + struct trace_var *next = trace->next; + rb_ary_push(ary, (VALUE)trace->data); + trace->removed = 1; + trace = next; + } + + if (!entry->var->block_trace) remove_trace(entry->var); + return ary; + } + else { + while (trace) { + if (trace->data == cmd) { + trace->removed = 1; + if (!entry->var->block_trace) remove_trace(entry->var); + return rb_ary_new3(1, cmd); + } + trace = trace->next; + } + } + return Qnil; +} + +VALUE +rb_gvar_get(entry) + struct global_entry *entry; +{ + struct global_variable *var = entry->var; + return (*var->getter)(entry->id, var->data, var); +} + +struct trace_data { + struct trace_var *trace; + VALUE val; +}; + +static VALUE +trace_ev(data) + struct trace_data *data; +{ + struct trace_var *trace = data->trace; + + while (trace) { + (*trace->func)(trace->data, data->val); + trace = trace->next; + } + return Qnil; /* not reached */ +} + +static VALUE +trace_en(var) + struct global_variable *var; +{ + var->block_trace = 0; + remove_trace(var); + return Qnil; /* not reached */ +} + +VALUE +rb_gvar_set(entry, val) + struct global_entry *entry; + VALUE val; +{ + struct trace_data trace; + struct global_variable *var = entry->var; + + if (rb_safe_level() >= 4) + rb_raise(rb_eSecurityError, "Insecure: can't change global variable value"); + (*var->setter)(val, entry->id, var->data, var); + + if (var->trace && !var->block_trace) { + var->block_trace = 1; + trace.trace = var->trace; + trace.val = val; + rb_ensure(trace_ev, (VALUE)&trace, trace_en, (VALUE)var); + } + return val; +} + +VALUE +rb_gv_set(name, val) + const char *name; + VALUE val; +{ + struct global_entry *entry; + + entry = rb_global_entry(global_id(name)); + return rb_gvar_set(entry, val); +} + +VALUE +rb_gv_get(name) + const char *name; +{ + struct global_entry *entry; + + entry = rb_global_entry(global_id(name)); + return rb_gvar_get(entry); +} + +VALUE +rb_gvar_defined(entry) + struct global_entry *entry; +{ + if (entry->var->getter == undef_getter) return Qfalse; + return Qtrue; +} + +static int +gvar_i(key, entry, ary) + ID key; + struct global_entry *entry; + VALUE ary; +{ + rb_ary_push(ary, rb_str_new2(rb_id2name(key))); + return ST_CONTINUE; +} + +/* + * call-seq: + * global_variables => array + * + * Returns an array of the names of global variables. + * + * global_variables.grep /std/ #=> ["$stderr", "$stdout", "$stdin"] + */ + +VALUE +rb_f_global_variables() +{ + VALUE ary = rb_ary_new(); + char buf[4]; + char *s = "&`'+123456789"; + + st_foreach_safe(rb_global_tbl, gvar_i, ary); + if (!NIL_P(rb_backref_get())) { + while (*s) { + sprintf(buf, "$%c", *s++); + rb_ary_push(ary, rb_str_new2(buf)); + } + } + return ary; +} + +void +rb_alias_variable(name1, name2) + ID name1; + ID name2; +{ + struct global_entry *entry1, *entry2; + + if (rb_safe_level() >= 4) + rb_raise(rb_eSecurityError, "Insecure: can't alias global variable"); + + entry2 = rb_global_entry(name2); + if (!st_lookup(rb_global_tbl, name1, (st_data_t *)&entry1)) { + entry1 = ALLOC(struct global_entry); + entry1->id = name1; + st_add_direct(rb_global_tbl, name1, (st_data_t)entry1); + } + else if (entry1->var != entry2->var) { + struct global_variable *var = entry1->var; + if (var->block_trace) { + rb_raise(rb_eRuntimeError, "can't alias in tracer"); + } + var->counter--; + if (var->counter == 0) { + struct trace_var *trace = var->trace; + while (trace) { + struct trace_var *next = trace->next; + free(trace); + trace = next; + } + free(var); + } + } + else { + return; + } + entry2->var->counter++; + entry1->var = entry2->var; +} + +static int special_generic_ivar = 0; +static st_table *generic_iv_tbl; + +st_table* +rb_generic_ivar_table(obj) + VALUE obj; +{ + st_table *tbl; + + if (!FL_TEST(obj, FL_EXIVAR)) return 0; + if (!generic_iv_tbl) return 0; + if (!st_lookup(generic_iv_tbl, obj, (st_data_t *)&tbl)) return 0; + return tbl; +} + +static VALUE +generic_ivar_get(obj, id) + VALUE obj; + ID id; +{ + st_table *tbl; + VALUE val; + + if (generic_iv_tbl) { + if (st_lookup(generic_iv_tbl, obj, (st_data_t *)&tbl)) { + if (st_lookup(tbl, id, &val)) { + return val; + } + } + } + + rb_warning("instance variable %s not initialized", rb_id2name(id)); + return Qnil; +} + +static void +generic_ivar_set(obj, id, val) + VALUE obj; + ID id; + VALUE val; +{ + st_table *tbl; + + if (rb_special_const_p(obj)) { + special_generic_ivar = 1; + } + if (!generic_iv_tbl) { + generic_iv_tbl = st_init_numtable(); + } + + if (!st_lookup(generic_iv_tbl, obj, (st_data_t *)&tbl)) { + FL_SET(obj, FL_EXIVAR); + tbl = st_init_numtable(); + st_add_direct(generic_iv_tbl, obj, (st_data_t)tbl); + st_add_direct(tbl, id, val); + return; + } + st_insert(tbl, id, val); +} + +static VALUE +generic_ivar_defined(obj, id) + VALUE obj; + ID id; +{ + st_table *tbl; + VALUE val; + + if (!generic_iv_tbl) return Qfalse; + if (!st_lookup(generic_iv_tbl, obj, (st_data_t *)&tbl)) return Qfalse; + if (st_lookup(tbl, id, &val)) { + return Qtrue; + } + return Qfalse; +} + +static int +generic_ivar_remove(obj, id, valp) + VALUE obj; + ID id; + VALUE *valp; +{ + st_table *tbl; + int status; + + if (!generic_iv_tbl) return 0; + if (!st_lookup(generic_iv_tbl, obj, (st_data_t *)&tbl)) return 0; + status = st_delete(tbl, &id, valp); + if (tbl->num_entries == 0) { + st_delete(generic_iv_tbl, &obj, (st_data_t *)&tbl); + st_free_table(tbl); + } + return status; +} + +void +rb_mark_generic_ivar(obj) + VALUE obj; +{ + st_table *tbl; + + if (!generic_iv_tbl) return; + if (st_lookup(generic_iv_tbl, obj, (st_data_t *)&tbl)) { + rb_mark_tbl(tbl); + } +} + +static int +givar_mark_i(key, value) + ID key; + VALUE value; +{ + rb_gc_mark(value); + return ST_CONTINUE; +} + +static int +givar_i(obj, tbl) + VALUE obj; + st_table *tbl; +{ + if (rb_special_const_p(obj)) { + st_foreach_safe(tbl, givar_mark_i, 0); + } + return ST_CONTINUE; +} + +void +rb_mark_generic_ivar_tbl() +{ + if (!generic_iv_tbl) return; + if (special_generic_ivar == 0) return; + st_foreach_safe(generic_iv_tbl, givar_i, 0); +} + +void +rb_free_generic_ivar(obj) + VALUE obj; +{ + st_table *tbl; + + if (!generic_iv_tbl) return; + if (st_delete(generic_iv_tbl, &obj, (st_data_t *)&tbl)) + st_free_table(tbl); +} + +void +rb_copy_generic_ivar(clone, obj) + VALUE clone, obj; +{ + st_table *tbl; + + if (!generic_iv_tbl) return; + if (!FL_TEST(obj, FL_EXIVAR)) return; + if (st_lookup(generic_iv_tbl, obj, (st_data_t *)&tbl)) { + st_table *old; + + if (st_lookup(generic_iv_tbl, clone, (st_data_t *)&old)) { + st_free_table(old); + st_insert(generic_iv_tbl, clone, (st_data_t)st_copy(tbl)); + } + else { + st_add_direct(generic_iv_tbl, clone, (st_data_t)st_copy(tbl)); + FL_SET(clone, FL_EXIVAR); + } + } +} + +static VALUE +ivar_get(obj, id, warn) + VALUE obj; + ID id; + int warn; +{ + VALUE val; + + switch (TYPE(obj)) { + case T_OBJECT: + case T_CLASS: + case T_MODULE: + if (ROBJECT(obj)->iv_tbl && st_lookup(ROBJECT(obj)->iv_tbl, id, &val)) + return val; + break; + default: + if (FL_TEST(obj, FL_EXIVAR) || rb_special_const_p(obj)) + return generic_ivar_get(obj, id); + break; + } + if (warn && ruby_verbose) { + rb_warning("instance variable %s not initialized", rb_id2name(id)); + } + + return Qnil; +} + +VALUE +rb_ivar_get(obj, id) + VALUE obj; + ID id; +{ + return ivar_get(obj, id, Qtrue); +} + +VALUE +rb_attr_get(obj, id) + VALUE obj; + ID id; +{ + return ivar_get(obj, id, Qfalse); +} + +VALUE +rb_ivar_set(obj, id, val) + VALUE obj; + ID id; + VALUE val; +{ + if (!OBJ_TAINTED(obj) && rb_safe_level() >= 4) + rb_raise(rb_eSecurityError, "Insecure: can't modify instance variable"); + if (OBJ_FROZEN(obj)) rb_error_frozen("object"); + switch (TYPE(obj)) { + case T_OBJECT: + case T_CLASS: + case T_MODULE: + if (!ROBJECT(obj)->iv_tbl) ROBJECT(obj)->iv_tbl = st_init_numtable(); + st_insert(ROBJECT(obj)->iv_tbl, id, val); + break; + default: + generic_ivar_set(obj, id, val); + break; + } + return val; +} + +VALUE +rb_ivar_defined(obj, id) + VALUE obj; + ID id; +{ + switch (TYPE(obj)) { + case T_OBJECT: + case T_CLASS: + case T_MODULE: + if (ROBJECT(obj)->iv_tbl && st_lookup(ROBJECT(obj)->iv_tbl, id, 0)) + return Qtrue; + break; + default: + if (FL_TEST(obj, FL_EXIVAR) || rb_special_const_p(obj)) + return generic_ivar_defined(obj, id); + break; + } + return Qfalse; +} + +static int +ivar_i(key, entry, ary) + ID key; + struct global_entry *entry; + VALUE ary; +{ + if (rb_is_instance_id(key)) { + rb_ary_push(ary, rb_str_new2(rb_id2name(key))); + } + return ST_CONTINUE; +} + +/* + * call-seq: + * obj.instance_variables => array + * + * Returns an array of instance variable names for the receiver. Note + * that simply defining an accessor does not create the corresponding + * instance variable. + * + * class Fred + * attr_accessor :a1 + * def initialize + * @iv = 3 + * end + * end + * Fred.new.instance_variables #=> ["@iv"] + */ + +VALUE +rb_obj_instance_variables(obj) + VALUE obj; +{ + VALUE ary; + + ary = rb_ary_new(); + switch (TYPE(obj)) { + case T_OBJECT: + case T_CLASS: + case T_MODULE: + if (ROBJECT(obj)->iv_tbl) { + st_foreach_safe(ROBJECT(obj)->iv_tbl, ivar_i, ary); + } + break; + default: + if (!generic_iv_tbl) break; + if (FL_TEST(obj, FL_EXIVAR) || rb_special_const_p(obj)) { + st_table *tbl; + + if (st_lookup(generic_iv_tbl, obj, (st_data_t *)&tbl)) { + st_foreach_safe(tbl, ivar_i, ary); + } + } + break; + } + return ary; +} + +/* + * call-seq: + * obj.remove_instance_variable(symbol) => obj + * + * Removes the named instance variable from <i>obj</i>, returning that + * variable's value. + * + * class Dummy + * attr_reader :var + * def initialize + * @var = 99 + * end + * def remove + * remove_instance_variable(:@var) + * end + * end + * d = Dummy.new + * d.var #=> 99 + * d.remove #=> 99 + * d.var #=> nil + */ + +VALUE +rb_obj_remove_instance_variable(obj, name) + VALUE obj, name; +{ + VALUE val = Qnil; + ID id = rb_to_id(name); + + if (!OBJ_TAINTED(obj) && rb_safe_level() >= 4) + rb_raise(rb_eSecurityError, "Insecure: can't modify instance variable"); + if (OBJ_FROZEN(obj)) rb_error_frozen("object"); + if (!rb_is_instance_id(id)) { + rb_name_error(id, "`%s' is not allowed as an instance variable name", rb_id2name(id)); + } + + switch (TYPE(obj)) { + case T_OBJECT: + case T_CLASS: + case T_MODULE: + if (ROBJECT(obj)->iv_tbl && st_delete(ROBJECT(obj)->iv_tbl, (st_data_t*)&id, &val)) { + return val; + } + break; + default: + if (FL_TEST(obj, FL_EXIVAR) || rb_special_const_p(obj)) { + if (generic_ivar_remove(obj, id, &val)) { + return val; + } + } + break; + } + rb_name_error(id, "instance variable %s not defined", rb_id2name(id)); + return Qnil; /* not reached */ +} + +NORETURN(static void uninitialized_constant _((VALUE, ID))); +static void +uninitialized_constant(klass, id) + VALUE klass; + ID id; +{ + if (klass && klass != rb_cObject) + rb_name_error(id, "uninitialized constant %s::%s", + rb_class2name(klass), + rb_id2name(id)); + else { + rb_name_error(id, "uninitialized constant %s", rb_id2name(id)); + } +} + +static VALUE +const_missing(klass, id) + VALUE klass; + ID id; +{ + return rb_funcall(klass, rb_intern("const_missing"), 1, ID2SYM(id)); +} + + +/* + * call-seq: + * mod.const_missing(sym) => obj + * + * Invoked when a reference is made to an undefined constant in + * <i>mod</i>. It is passed a symbol for the undefined constant, and + * returns a value to be used for that constant. The + * following code is a (very bad) example: if reference is made to + * an undefined constant, it attempts to load a file whose name is + * the lowercase version of the constant (thus class <code>Fred</code> is + * assumed to be in file <code>fred.rb</code>). If found, it returns the + * value of the loaded class. It therefore implements a perverse + * kind of autoload facility. + * + * def Object.const_missing(name) + * @looked_for ||= {} + * str_name = name.to_s + * raise "Class not found: #{name}" if @looked_for[str_name] + * @looked_for[str_name] = 1 + * file = str_name.downcase + * require file + * klass = const_get(name) + * return klass if klass + * raise "Class not found: #{name}" + * end + * + */ + +VALUE +rb_mod_const_missing(klass, name) + VALUE klass, name; +{ + ruby_frame = ruby_frame->prev; /* pop frame for "const_missing" */ + uninitialized_constant(klass, rb_to_id(name)); + return Qnil; /* not reached */ +} + +static struct st_table * +check_autoload_table(av) + VALUE av; +{ + Check_Type(av, T_DATA); + if (RDATA(av)->dmark != (RUBY_DATA_FUNC)rb_mark_tbl || + RDATA(av)->dfree != (RUBY_DATA_FUNC)st_free_table) { + rb_raise(rb_eTypeError, "wrong autoload table: %s", RSTRING(rb_inspect(av))->ptr); + } + return (struct st_table *)DATA_PTR(av); +} + +void +rb_autoload(mod, id, file) + VALUE mod; + ID id; + const char *file; +{ + VALUE av, fn; + struct st_table *tbl; + + if (!rb_is_const_id(id)) { + rb_raise(rb_eNameError, "autoload must be constant name", rb_id2name(id)); + } + if (!file || !*file) { + rb_raise(rb_eArgError, "empty file name"); + } + + if ((tbl = RCLASS(mod)->iv_tbl) && st_lookup(tbl, id, &av) && av != Qundef) + return; + + rb_const_set(mod, id, Qundef); + tbl = RCLASS(mod)->iv_tbl; + if (st_lookup(tbl, autoload, &av)) { + tbl = check_autoload_table(av); + } + else { + av = Data_Wrap_Struct(0, rb_mark_tbl, st_free_table, 0); + st_add_direct(tbl, autoload, av); + DATA_PTR(av) = tbl = st_init_numtable(); + } + fn = rb_str_new2(file); + FL_UNSET(fn, FL_TAINT); + OBJ_FREEZE(fn); + st_insert(tbl, id, (st_data_t)rb_node_newnode(NODE_MEMO, fn, ruby_safe_level, 0)); +} + +static NODE* +autoload_delete(mod, id) + VALUE mod; + ID id; +{ + VALUE val; + st_data_t load = 0; + + st_delete(RCLASS(mod)->iv_tbl, (st_data_t*)&id, 0); + if (st_lookup(RCLASS(mod)->iv_tbl, autoload, &val)) { + struct st_table *tbl = check_autoload_table(val); + + st_delete(tbl, (st_data_t*)&id, &load); + + if (tbl->num_entries == 0) { + DATA_PTR(val) = 0; + st_free_table(tbl); + id = autoload; + if (st_delete(RCLASS(mod)->iv_tbl, (st_data_t*)&id, &val)) { + rb_gc_force_recycle(val); + } + } + } + + return (NODE *)load; +} + +void +rb_autoload_load(klass, id) + VALUE klass; + ID id; +{ + VALUE file; + NODE *load = autoload_delete(klass, id); + + if (!load || !(file = load->nd_lit) || rb_provided(RSTRING(file)->ptr)) { + const_missing(klass, id); + } + rb_require_safe(file, load->nd_nth); +} + +static VALUE +autoload_file(mod, id) + VALUE mod; + ID id; +{ + VALUE val, file; + struct st_table *tbl; + st_data_t load; + + if (!st_lookup(RCLASS(mod)->iv_tbl, autoload, &val) || + !(tbl = check_autoload_table(val)) || !st_lookup(tbl, id, &load)) { + return Qnil; + } + file = ((NODE *)load)->nd_lit; + Check_Type(file, T_STRING); + if (!RSTRING(file)->ptr) { + rb_raise(rb_eArgError, "empty file name"); + } + if (!rb_provided(RSTRING(file)->ptr)) { + return file; + } + + /* already loaded but not defined */ + st_delete(tbl, (st_data_t*)&id, 0); + if (!tbl->num_entries) { + DATA_PTR(val) = 0; + st_free_table(tbl); + id = autoload; + if (st_delete(RCLASS(mod)->iv_tbl, (st_data_t*)&id, &val)) { + rb_gc_force_recycle(val); + } + } + return Qnil; +} + +VALUE +rb_autoload_p(mod, id) + VALUE mod; + ID id; +{ + struct st_table *tbl = RCLASS(mod)->iv_tbl; + VALUE val; + + if (!tbl || !st_lookup(tbl, id, &val) || val != Qundef) { + return Qnil; + } + return autoload_file(mod, id); +} + +static VALUE +rb_const_get_0(klass, id, exclude, recurse) + VALUE klass; + ID id; + int exclude, recurse; +{ + VALUE value, tmp; + int mod_retry = 0; + + tmp = klass; + retry: + while (tmp) { + while (RCLASS(tmp)->iv_tbl && st_lookup(RCLASS(tmp)->iv_tbl,id,&value)) { + if (value == Qundef) { + rb_autoload_load(tmp, id); + continue; + } + if (exclude && tmp == rb_cObject && klass != rb_cObject) { + rb_warn("toplevel constant %s referenced by %s::%s", + rb_id2name(id), rb_class2name(klass), rb_id2name(id)); + } + return value; + } + if (!recurse && klass != rb_cObject) break; + tmp = RCLASS(tmp)->super; + } + if (!exclude && !mod_retry && BUILTIN_TYPE(klass) == T_MODULE) { + mod_retry = 1; + tmp = rb_cObject; + goto retry; + } + + return const_missing(klass, id); +} + +VALUE +rb_const_get_from(klass, id) + VALUE klass; + ID id; +{ + return rb_const_get_0(klass, id, Qtrue, Qtrue); +} + +VALUE +rb_const_get(klass, id) + VALUE klass; + ID id; +{ + return rb_const_get_0(klass, id, Qfalse, Qtrue); +} + +VALUE +rb_const_get_at(klass, id) + VALUE klass; + ID id; +{ + return rb_const_get_0(klass, id, Qtrue, Qfalse); +} + +/* + * call-seq: + * remove_const(sym) => obj + * + * Removes the definition of the given constant, returning that + * constant's value. Predefined classes and singleton objects (such as + * <i>true</i>) cannot be removed. + */ + +VALUE +rb_mod_remove_const(mod, name) + VALUE mod, name; +{ + ID id = rb_to_id(name); + VALUE val; + + if (!rb_is_const_id(id)) { + rb_name_error(id, "`%s' is not allowed as a constant name", rb_id2name(id)); + } + if (!OBJ_TAINTED(mod) && rb_safe_level() >= 4) + rb_raise(rb_eSecurityError, "Insecure: can't remove constant"); + if (OBJ_FROZEN(mod)) rb_error_frozen("class/module"); + + if (RCLASS(mod)->iv_tbl && st_delete(ROBJECT(mod)->iv_tbl, (st_data_t*)&id, &val)) { + if (val == Qundef) { + autoload_delete(mod, id); + val = Qnil; + } + return val; + } + if (rb_const_defined_at(mod, id)) { + rb_name_error(id, "cannot remove %s::%s", + rb_class2name(mod), rb_id2name(id)); + } + rb_name_error(id, "constant %s::%s not defined", + rb_class2name(mod), rb_id2name(id)); + return Qnil; /* not reached */ +} + +static int +sv_i(key, value, tbl) + ID key; + VALUE value; + st_table *tbl; +{ + if (rb_is_const_id(key)) { + if (!st_lookup(tbl, key, 0)) { + st_insert(tbl, key, key); + } + } + return ST_CONTINUE; +} + +void* +rb_mod_const_at(mod, data) + VALUE mod; + void *data; +{ + st_table *tbl = data; + if (!tbl) { + tbl = st_init_numtable(); + } + if (RCLASS(mod)->iv_tbl) { + st_foreach_safe(RCLASS(mod)->iv_tbl, sv_i, (st_data_t)tbl); + } + return tbl; +} + +void* +rb_mod_const_of(mod, data) + VALUE mod; + void *data; +{ + VALUE tmp = mod; + for (;;) { + data = rb_mod_const_at(tmp, data); + tmp = RCLASS(tmp)->super; + if (!tmp) break; + if (tmp == rb_cObject && mod != rb_cObject) break; + } + return data; +} + +static int +list_i(key, value, ary) + ID key, value; + VALUE ary; +{ + rb_ary_push(ary, rb_str_new2(rb_id2name(key))); + return ST_CONTINUE; +} + +VALUE +rb_const_list(data) + void *data; +{ + st_table *tbl = data; + VALUE ary; + + if (!tbl) return rb_ary_new2(0); + ary = rb_ary_new2(tbl->num_entries); + st_foreach_safe(tbl, list_i, ary); + st_free_table(tbl); + + return ary; +} + +/* + * call-seq: + * mod.constants => array + * + * Returns an array of the names of the constants accessible in + * <i>mod</i>. This includes the names of constants in any included + * modules (example at start of section). + */ + +VALUE +rb_mod_constants(mod) + VALUE mod; +{ + return rb_const_list(rb_mod_const_of(mod, 0)); +} + +static int +rb_const_defined_0(klass, id, exclude, recurse) + VALUE klass; + ID id; + int exclude, recurse; +{ + VALUE value, tmp; + int mod_retry = 0; + + tmp = klass; + retry: + while (tmp) { + if (RCLASS(tmp)->iv_tbl && st_lookup(RCLASS(tmp)->iv_tbl, id, &value)) { + if (value == Qundef && NIL_P(autoload_file(klass, id))) + return Qfalse; + return Qtrue; + } + if (!recurse && klass != rb_cObject) break; + tmp = RCLASS(tmp)->super; + } + if (!exclude && !mod_retry && BUILTIN_TYPE(klass) == T_MODULE) { + mod_retry = 1; + tmp = rb_cObject; + goto retry; + } + return Qfalse; +} + +int +rb_const_defined_from(klass, id) + VALUE klass; + ID id; +{ + return rb_const_defined_0(klass, id, Qtrue, Qtrue); +} + +int +rb_const_defined(klass, id) + VALUE klass; + ID id; +{ + return rb_const_defined_0(klass, id, Qfalse, Qtrue); +} + +int +rb_const_defined_at(klass, id) + VALUE klass; + ID id; +{ + return rb_const_defined_0(klass, id, Qtrue, Qfalse); +} + +static void +mod_av_set(klass, id, val, isconst) + VALUE klass; + ID id; + VALUE val; + int isconst; +{ + char *dest = isconst ? "constant" : "class variable"; + + if (!OBJ_TAINTED(klass) && rb_safe_level() >= 4) + rb_raise(rb_eSecurityError, "Insecure: can't set %s", dest); + if (OBJ_FROZEN(klass)) { + if (BUILTIN_TYPE(klass) == T_MODULE) { + rb_error_frozen("module"); + } + else { + rb_error_frozen("class"); + } + } + if (!RCLASS(klass)->iv_tbl) { + RCLASS(klass)->iv_tbl = st_init_numtable(); + } + else if (isconst) { + VALUE value = Qfalse; + + if (st_lookup(RCLASS(klass)->iv_tbl, id, &value)) { + if (value == Qundef) + autoload_delete(klass, id); + else + rb_warn("already initialized %s %s", dest, rb_id2name(id)); + } + } + + st_insert(RCLASS(klass)->iv_tbl, id, val); +} + +void +rb_const_set(klass, id, val) + VALUE klass; + ID id; + VALUE val; +{ + mod_av_set(klass, id, val, Qtrue); +} + +void +rb_define_const(klass, name, val) + VALUE klass; + const char *name; + VALUE val; +{ + ID id = rb_intern(name); + + if (!rb_is_const_id(id)) { + rb_warn("rb_define_const: invalide name `%s' for constant", name); + } + if (klass == rb_cObject) { + rb_secure(4); + } + rb_const_set(klass, id, val); +} + +void +rb_define_global_const(name, val) + const char *name; + VALUE val; +{ + rb_define_const(rb_cObject, name, val); +} + +void +rb_cvar_set(klass, id, val, warn) + VALUE klass; + ID id; + VALUE val; + int warn; +{ + mod_av_set(klass, id, val, Qfalse); +} + +VALUE +rb_cvar_get(klass, id) + VALUE klass; + ID id; +{ + VALUE value; + + if (RCLASS(klass)->iv_tbl && st_lookup(RCLASS(klass)->iv_tbl,id,&value)) { + return value; + } + + rb_name_error(id,"uninitialized class variable %s in %s", + rb_id2name(id), rb_class2name(klass)); + return Qnil; /* not reached */ +} + +VALUE +rb_cvar_defined(klass, id) + VALUE klass; + ID id; +{ + if (RCLASS(klass)->iv_tbl && st_lookup(RCLASS(klass)->iv_tbl,id,0)) { + return Qtrue; + } + return Qfalse; +} + +void +rb_cv_set(klass, name, val) + VALUE klass; + const char *name; + VALUE val; +{ + ID id = rb_intern(name); + if (!rb_is_class_id(id)) { + rb_name_error(id, "wrong class variable name %s", name); + } + rb_cvar_set(klass, id, val, Qfalse); +} + +VALUE +rb_cv_get(klass, name) + VALUE klass; + const char *name; +{ + ID id = rb_intern(name); + if (!rb_is_class_id(id)) { + rb_name_error(id, "wrong class variable name %s", name); + } + return rb_cvar_get(klass, id); +} + +void +rb_define_class_variable(klass, name, val) + VALUE klass; + const char *name; + VALUE val; +{ + ID id = rb_intern(name); + + if (!rb_is_class_id(id)) { + rb_name_error(id, "wrong class variable name %s", name); + } + rb_cvar_set(klass, id, val, Qtrue); +} + +static int +cv_i(key, value, ary) + ID key; + VALUE value; + VALUE ary; +{ + if (rb_is_class_id(key)) { + VALUE kval = rb_str_new2(rb_id2name(key)); + if (!rb_ary_includes(ary, kval)) { + rb_ary_push(ary, kval); + } + } + return ST_CONTINUE; +} + +/* + * call-seq: + * mod.class_variables => array + * + * Returns an array of the names of class variables in <i>mod</i>. + * + * class One + * @@var1 = 1 + * end + * class Two < One + * @@var2 = 2 + * end + * One.class_variables #=> ["@@var1"] + * Two.class_variables #=> ["@@var2"] + */ + +VALUE +rb_mod_class_variables(obj) + VALUE obj; +{ + VALUE ary = rb_ary_new(); + + if (RCLASS(obj)->iv_tbl) { + st_foreach_safe(RCLASS(obj)->iv_tbl, cv_i, ary); + } + return ary; +} + +/* + * call-seq: + * remove_class_variable(sym) => obj + * + * Removes the definition of the <i>sym</i>, returning that + * constant's value. + * + * class Dummy + * @@var = 99 + * puts @@var + * remove_class_variable(:@@var) + * puts(defined? @@var) + * end + * + * <em>produces:</em> + * + * 99 + * nil + */ + +VALUE +rb_mod_remove_cvar(mod, name) + VALUE mod, name; +{ + ID id = rb_to_id(name); + VALUE val; + + if (!rb_is_class_id(id)) { + rb_name_error(id, "wrong class variable name %s", rb_id2name(id)); + } + if (!OBJ_TAINTED(mod) && rb_safe_level() >= 4) + rb_raise(rb_eSecurityError, "Insecure: can't remove class variable"); + if (OBJ_FROZEN(mod)) rb_error_frozen("class/module"); + + if (RCLASS(mod)->iv_tbl && st_delete(ROBJECT(mod)->iv_tbl, (st_data_t*)&id, &val)) { + return val; + } + if (rb_cvar_defined(mod, id)) { + rb_name_error(id, "cannot remove %s for %s", + rb_id2name(id), rb_class2name(mod)); + } + rb_name_error(id, "class variable %s not defined for %s", + rb_id2name(id), rb_class2name(mod)); + return Qnil; /* not reached */ +} + +VALUE +rb_iv_get(obj, name) + VALUE obj; + const char *name; +{ + ID id = rb_intern(name); + + return rb_ivar_get(obj, id); +} + +VALUE +rb_iv_set(obj, name, val) + VALUE obj; + const char *name; + VALUE val; +{ + ID id = rb_intern(name); + + return rb_ivar_set(obj, id, val); +} +/********************************************************************** + + version.c - + + $Author: nobu $ + $Date: 2004/03/25 12:01:40 $ + created at: Thu Sep 30 20:08:01 JST 1993 + + Copyright (C) 1993-2003 Yukihiro Matsumoto + +**********************************************************************/ + +#include "ruby.h" +#include "version.h" +#include <stdio.h> + +const char ruby_version[] = RUBY_VERSION; +const char ruby_release_date[] = RUBY_RELEASE_DATE; +const char ruby_platform[] = RUBY_PLATFORM; + +void +Init_version() +{ + VALUE v = rb_obj_freeze(rb_str_new2(ruby_version)); + VALUE d = rb_obj_freeze(rb_str_new2(ruby_release_date)); + VALUE p = rb_obj_freeze(rb_str_new2(ruby_platform)); + + rb_define_global_const("RUBY_VERSION", v); + rb_define_global_const("RUBY_RELEASE_DATE", d); + rb_define_global_const("RUBY_PLATFORM", p); +} + +void +ruby_show_version() +{ + printf("ruby %s (%s) [%s]\n", RUBY_VERSION, RUBY_RELEASE_DATE, RUBY_PLATFORM); +} + +void +ruby_show_copyright() +{ + printf("ruby - Copyright (C) 1993-%d Yukihiro Matsumoto\n", RUBY_RELEASE_YEAR); + exit(0); +} diff --git a/bench/example.cpp b/bench/example.cpp new file mode 100644 index 0000000..ba9cf72 --- /dev/null +++ b/bench/example.cpp @@ -0,0 +1,13544 @@ +/*************************************************************************** + ansigenerator.cpp - description + ------------------- + begin : Jul 5 2004 + copyright : (C) 2004 by André Simon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include "ansigenerator.h" + +using namespace std; + +namespace highlight { + + +string AnsiGenerator::getOpenTag(const string&font, + const string&fgCol, const string&bgCol) { + ostringstream s; + s << "\033["<<font; + if (!fgCol.empty()) + s<<";"<<fgCol; + if (!bgCol.empty()) + s<<";"<<bgCol; + s << "m"; + return s.str(); +} + + +AnsiGenerator::AnsiGenerator(const string &colourTheme) + : CodeGenerator(colourTheme) { + styleTagOpen.push_back(""); + styleTagOpen.push_back(getOpenTag("00", "31")); //str + styleTagOpen.push_back(getOpenTag("00", "34"));//number + styleTagOpen.push_back(getOpenTag("00", "34"));//sl comment + styleTagOpen.push_back(getOpenTag("00", "34"));//ml comment + styleTagOpen.push_back(getOpenTag("00", "35"));//escapeChar + styleTagOpen.push_back(getOpenTag("00", "35"));//directive + styleTagOpen.push_back(getOpenTag("01", "31"));//directive string + styleTagOpen.push_back(getOpenTag("00", "30"));//linenum + styleTagOpen.push_back(getOpenTag("01", "00"));//symbol + + styleTagClose.push_back(""); + for (int i=1;i<NUMBER_BUILTIN_STYLES; i++) { + styleTagClose.push_back("\033[m"); + } + newLineTag = "\n"; + spacer = " "; +} + +AnsiGenerator::AnsiGenerator() {} +AnsiGenerator::~AnsiGenerator() {} + +string AnsiGenerator::getHeader(const string & title) { + return string(); +} + +void AnsiGenerator::printBody() { + processRootState(); +} + +string AnsiGenerator::getFooter() { + return string(); +} + +string AnsiGenerator::maskCharacter(unsigned char c) { + string m; + m+=c; + return m; +} + +string AnsiGenerator::getMatchingOpenTag(unsigned int styleID) { + return (styleID)?getOpenTag("01", "32", ""):getOpenTag("00", "33"); +} + +string AnsiGenerator::getMatchingCloseTag(unsigned int styleID) { + return "\033[m"; +} + +} +/*************************************************************************** + ansicode.h - description + ------------------- + begin : Jul 5 2004 + copyright : (C) 2004 by Andre Simon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#ifndef ANSIGENERATOR_H +#define ANSIGENERATOR_H + +#include <iostream> +#include <fstream> +#include <string> +#include <sstream> + +#include "codegenerator.h" +#include "charcodes.h" +#include "version.h" + +namespace highlight { + +/** + \brief This class generates ANSI escape sequences. + + It contains information about the resulting document structure (document + header and footer), the colour system, white space handling and text + formatting attributes. + +* @author Andre Simon +*/ + +class AnsiGenerator : public highlight::CodeGenerator + { + public: + + /** Constructor + \param colourTheme Name of Colour theme to use + */ + AnsiGenerator( const string &colourTheme); + AnsiGenerator(); + ~AnsiGenerator(); + + /** prints document header + \param title Title of the document + */ + string getHeader(const string & title); + + /** Prints document footer*/ + string getFooter(); + + /** Prints document body*/ + void printBody(); + + private: + + /** \return escaped character*/ + virtual string maskCharacter(unsigned char ); + + + /** gibt ANSI-"Tags" zurueck (Farbindex+bold+kursiv)*/ + string getOpenTag(const string&font, + const string&fgCol, const string&bgCol=""); + + + + string getMatchingOpenTag(unsigned int styleID); + string getMatchingCloseTag(unsigned int styleID); + }; + +} +#endif +/* + * Copyright (c) 1998,1999,2000,2001,2002 Tal Davidson. All rights reserved. + * + * ASBeautifier.cpp + * by Tal Davidson (davidsont@bigfoot.com) + * This file is a part of "Artistic Style" - an indentater and reformatter + * of C, C, C# and Java source files. + * + * The "Artistic Style" project, including all files needed to compile it, + * is free software; you can redistribute it and/or use it and/or modify it + * under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU General Public + * License along with this program. + * + * Patches: + * 18 March 1999 - Brian Rampel - + * Fixed inverse insertion of spaces vs. tabs when in -t mode. + * 08 may 2004 + * applied ASBeautifier.cpp.BITFIELD.patch.bz2 + */ + +#include "compiler_defines.h" +#include "ASBeautifier.h" + +#include <vector> +#include <string> +#include <cctype> +#include <algorithm> +#include <iostream> + + +#define INIT_CONTAINER(container, value) {if ( (container) != NULL ) delete (container); (container) = (value); } +#define DELETE_CONTAINER(container) {if ( (container) != NULL ) delete (container) ; } + +#ifdef USES_NAMESPACE +using namespace std; +#endif + + + + +#ifdef USES_NAMESPACE +namespace astyle + { +#endif + + bool ASBeautifier::calledInitStatic = false; + + vector<const string*> ASBeautifier::headers; + vector<const string*> ASBeautifier::nonParenHeaders; + vector<const string*> ASBeautifier::preBlockStatements; + vector<const string*> ASBeautifier::assignmentOperators; + vector<const string*> ASBeautifier::nonAssignmentOperators; + + /* + * initialize the static vars + */ + void ASBeautifier::initStatic() + { + if (calledInitStatic) + return; + + calledInitStatic = true; + + headers.push_back(&AS_IF); + headers.push_back(&AS_ELSE); + headers.push_back(&AS_FOR); + headers.push_back(&AS_WHILE); + headers.push_back(&AS_DO); + headers.push_back(&AS_TRY); + headers.push_back(&AS_CATCH); + headers.push_back(&AS_FINALLY); + headers.push_back(&AS_SYNCHRONIZED); + headers.push_back(&AS_SWITCH); + headers.push_back(&AS_CASE); + headers.push_back(&AS_DEFAULT); + headers.push_back(&AS_FOREACH); + headers.push_back(&AS_LOCK); + headers.push_back(&AS_UNSAFE); + headers.push_back(&AS_FIXED); + headers.push_back(&AS_GET); + headers.push_back(&AS_SET); + headers.push_back(&AS_ADD); + headers.push_back(&AS_REMOVE); + //headers.push_back(&AS_PUBLIC); + //headers.push_back(&AS_PRIVATE); + //headers.push_back(&AS_PROTECTED); + + //headers.push_back(&AS_OPERATOR); + headers.push_back(&AS_TEMPLATE); + headers.push_back(&AS_CONST); + /**/ + headers.push_back(&AS_STATIC); + headers.push_back(&AS_EXTERN); + + nonParenHeaders.push_back(&AS_ELSE); + nonParenHeaders.push_back(&AS_DO); + nonParenHeaders.push_back(&AS_TRY); + nonParenHeaders.push_back(&AS_FINALLY); + nonParenHeaders.push_back(&AS_STATIC); + nonParenHeaders.push_back(&AS_CONST); + nonParenHeaders.push_back(&AS_EXTERN); + nonParenHeaders.push_back(&AS_CASE); + nonParenHeaders.push_back(&AS_DEFAULT); + nonParenHeaders.push_back(&AS_UNSAFE); + nonParenHeaders.push_back(&AS_GET); + nonParenHeaders.push_back(&AS_SET); + nonParenHeaders.push_back(&AS_ADD); + nonParenHeaders.push_back(&AS_REMOVE); + + + + nonParenHeaders.push_back(&AS_PUBLIC); + nonParenHeaders.push_back(&AS_PRIVATE); + nonParenHeaders.push_back(&AS_PROTECTED); + nonParenHeaders.push_back(&AS_TEMPLATE); + nonParenHeaders.push_back(&AS_CONST); + /// nonParenHeaders.push_back(&AS_ASM); + + preBlockStatements.push_back(&AS_CLASS); + preBlockStatements.push_back(&AS_STRUCT); + preBlockStatements.push_back(&AS_UNION); + preBlockStatements.push_back(&AS_INTERFACE); + preBlockStatements.push_back(&AS_NAMESPACE); + preBlockStatements.push_back(&AS_THROWS); + preBlockStatements.push_back(&AS_EXTERN); + + assignmentOperators.push_back(&AS_ASSIGN); + assignmentOperators.push_back(&AS_PLUS_ASSIGN); + assignmentOperators.push_back(&AS_MINUS_ASSIGN); + assignmentOperators.push_back(&AS_MULT_ASSIGN); + assignmentOperators.push_back(&AS_DIV_ASSIGN); + assignmentOperators.push_back(&AS_MOD_ASSIGN); + assignmentOperators.push_back(&AS_OR_ASSIGN); + assignmentOperators.push_back(&AS_AND_ASSIGN); + assignmentOperators.push_back(&AS_XOR_ASSIGN); + assignmentOperators.push_back(&AS_GR_GR_GR_ASSIGN); + assignmentOperators.push_back(&AS_GR_GR_ASSIGN); + assignmentOperators.push_back(&AS_LS_LS_LS_ASSIGN); + assignmentOperators.push_back(&AS_LS_LS_ASSIGN); + + assignmentOperators.push_back(&AS_RETURN); + + nonAssignmentOperators.push_back(&AS_EQUAL); + nonAssignmentOperators.push_back(&AS_PLUS_PLUS); + nonAssignmentOperators.push_back(&AS_MINUS_MINUS); + nonAssignmentOperators.push_back(&AS_NOT_EQUAL); + nonAssignmentOperators.push_back(&AS_GR_EQUAL); + nonAssignmentOperators.push_back(&AS_GR_GR_GR); + nonAssignmentOperators.push_back(&AS_GR_GR); + nonAssignmentOperators.push_back(&AS_LS_EQUAL); + nonAssignmentOperators.push_back(&AS_LS_LS_LS); + nonAssignmentOperators.push_back(&AS_LS_LS); + nonAssignmentOperators.push_back(&AS_ARROW); + nonAssignmentOperators.push_back(&AS_AND); + nonAssignmentOperators.push_back(&AS_OR); + } + + /** + * ASBeautifier's constructor + */ + ASBeautifier::ASBeautifier() + { + initStatic(); + + waitingBeautifierStack = NULL; + activeBeautifierStack = NULL; + waitingBeautifierStackLengthStack = NULL; + activeBeautifierStackLengthStack = NULL; + + headerStack = NULL; + tempStacks = NULL; + blockParenDepthStack = NULL; + blockStatementStack = NULL; + parenStatementStack = NULL; + bracketBlockStateStack = NULL; + inStatementIndentStack = NULL; + inStatementIndentStackSizeStack = NULL; + parenIndentStack = NULL; + sourceIterator = NULL; + + isMinimalConditinalIndentSet = false; + shouldForceTabIndentation = false; + + setSpaceIndentation(4); + setMaxInStatementIndentLength(40); + setClassIndent(false); + setSwitchIndent(false); + setCaseIndent(false); + setBlockIndent(false); + setBracketIndent(false); + setNamespaceIndent(false); + setLabelIndent(false); + setEmptyLineFill(false); + setCStyle(); + setPreprocessorIndent(false); + } + + ASBeautifier::ASBeautifier(const ASBeautifier &other) + { + waitingBeautifierStack = NULL; + activeBeautifierStack = NULL; + waitingBeautifierStackLengthStack = NULL; + activeBeautifierStackLengthStack = NULL; + + headerStack = new vector<const string*>; + *headerStack = *other.headerStack; + + tempStacks = new vector< vector<const string*>* >; + vector< vector<const string*>* >::iterator iter; + for (iter = other.tempStacks->begin(); + iter != other.tempStacks->end(); + ++iter) + { + vector<const string*> *newVec = new vector<const string*>; + *newVec = **iter; + tempStacks->push_back(newVec); + } + blockParenDepthStack = new vector<int>; + *blockParenDepthStack = *other.blockParenDepthStack; + + blockStatementStack = new vector<bool>; + *blockStatementStack = *other.blockStatementStack; + + parenStatementStack = new vector<bool>; + *parenStatementStack = *other.parenStatementStack; + + bracketBlockStateStack = new vector<bool>; + *bracketBlockStateStack = *other.bracketBlockStateStack; + + inStatementIndentStack = new vector<int>; + *inStatementIndentStack = *other.inStatementIndentStack; + + inStatementIndentStackSizeStack = new vector<int>; + *inStatementIndentStackSizeStack = *other.inStatementIndentStackSizeStack; + + parenIndentStack = new vector<int>; + *parenIndentStack = *other.parenIndentStack; + + sourceIterator = other.sourceIterator; + + indentString = other.indentString; + currentHeader = other.currentHeader; + previousLastLineHeader = other.previousLastLineHeader; + immediatelyPreviousAssignmentOp = other.immediatelyPreviousAssignmentOp; + isInQuote = other.isInQuote; + isInComment = other.isInComment; + isInCase = other.isInCase; + isInQuestion = other.isInQuestion; + isInStatement =other. isInStatement; + isInHeader = other.isInHeader; + isCStyle = other.isCStyle; + isInOperator = other.isInOperator; + isInTemplate = other.isInTemplate; + isInConst = other.isInConst; + classIndent = other.classIndent; + isInClassHeader = other.isInClassHeader; + isInClassHeaderTab = other.isInClassHeaderTab; + switchIndent = other.switchIndent; + caseIndent = other.caseIndent; + namespaceIndent = other.namespaceIndent; + bracketIndent = other.bracketIndent; + blockIndent = other.blockIndent; + labelIndent = other.labelIndent; + preprocessorIndent = other.preprocessorIndent; + parenDepth = other.parenDepth; + indentLength = other.indentLength; + blockTabCount = other.blockTabCount; + leadingWhiteSpaces = other.leadingWhiteSpaces; + maxInStatementIndent = other.maxInStatementIndent; + templateDepth = other.templateDepth; + quoteChar = other.quoteChar; + prevNonSpaceCh = other.prevNonSpaceCh; + currentNonSpaceCh = other.currentNonSpaceCh; + currentNonLegalCh = other.currentNonLegalCh; + prevNonLegalCh = other.prevNonLegalCh; + isInConditional = other.isInConditional; + minConditionalIndent = other.minConditionalIndent; + prevFinalLineSpaceTabCount = other.prevFinalLineSpaceTabCount; + prevFinalLineTabCount = other.prevFinalLineTabCount; + emptyLineFill = other.emptyLineFill; + probationHeader = other.probationHeader; + isInDefine = other.isInDefine; + isInDefineDefinition = other.isInDefineDefinition; + backslashEndsPrevLine = other.backslashEndsPrevLine; + defineTabCount = other.defineTabCount; + } + + /** + * ASBeautifier's destructor + */ + ASBeautifier::~ASBeautifier() + { + DELETE_CONTAINER( headerStack ); + DELETE_CONTAINER( tempStacks ); + DELETE_CONTAINER( blockParenDepthStack ); + DELETE_CONTAINER( blockStatementStack ); + DELETE_CONTAINER( parenStatementStack ); + DELETE_CONTAINER( bracketBlockStateStack ); + DELETE_CONTAINER( inStatementIndentStack ); + DELETE_CONTAINER( inStatementIndentStackSizeStack ); + DELETE_CONTAINER( parenIndentStack ); + + // DELETE_CONTAINER( sourceIterator ); + } + + /** + * initialize the ASBeautifier. + * + * init() should be called every time a ABeautifier object is to start + * beautifying a NEW source file. + * init() recieves a pointer to a DYNAMICALLY CREATED ASSourceIterator object + * that will be used to iterate through the source code. This object will be + * deleted during the ASBeautifier's destruction, and thus should not be + * deleted elsewhere. + * + * @param iter a pointer to the DYNAMICALLY CREATED ASSourceIterator object. + */ + void ASBeautifier::init(ASSourceIterator *iter) + + { + sourceIterator = iter; + init(); + } + + /** + * initialize the ASBeautifier. + */ + void ASBeautifier::init() + { + INIT_CONTAINER( waitingBeautifierStack, new vector<ASBeautifier*> ); + INIT_CONTAINER( activeBeautifierStack, new vector<ASBeautifier*> ); + + INIT_CONTAINER( waitingBeautifierStackLengthStack, new vector<int> ); + INIT_CONTAINER( activeBeautifierStackLengthStack, new vector<int> ); + + INIT_CONTAINER( headerStack, new vector<const string*> ); + INIT_CONTAINER( tempStacks, new vector< vector<const string*>* > ); + tempStacks->push_back(new vector<const string*>); + + INIT_CONTAINER( blockParenDepthStack, new vector<int> ); + INIT_CONTAINER( blockStatementStack, new vector<bool> ); + INIT_CONTAINER( parenStatementStack, new vector<bool> ); + + INIT_CONTAINER( bracketBlockStateStack, new vector<bool> ); + bracketBlockStateStack->push_back(true); + + INIT_CONTAINER( inStatementIndentStack, new vector<int> ); + INIT_CONTAINER( inStatementIndentStackSizeStack, new vector<int> ); + inStatementIndentStackSizeStack->push_back(0); + INIT_CONTAINER( parenIndentStack, new vector<int> ); + + immediatelyPreviousAssignmentOp = NULL; + previousLastLineHeader = NULL; + + isInQuote = false; + isInComment = false; + isInStatement = false; + isInCase = false; + isInQuestion = false; + isInClassHeader = false; + isInClassHeaderTab = false; + isInHeader = false; + isInOperator = false; + isInTemplate = false; + isInConst = false; + isInConditional = false; + templateDepth = 0; + parenDepth=0; + blockTabCount = 0; + leadingWhiteSpaces = 0; + prevNonSpaceCh = '{'; + currentNonSpaceCh = '{'; + prevNonLegalCh = '{'; + currentNonLegalCh = '{'; + prevFinalLineSpaceTabCount = 0; + prevFinalLineTabCount = 0; + probationHeader = NULL; + backslashEndsPrevLine = false; + isInDefine = false; + isInDefineDefinition = false; + defineTabCount = 0; + } + + /** + * set indentation style to ANSI C/C++. + */ + void ASBeautifier::setCStyle() + { + isCStyle = true; + } + + /** + * set indentation style to Java / K&R. + */ + void ASBeautifier::setJavaStyle() + { + isCStyle = false; + } + + /** + * indent using one tab per indentation + */ + void ASBeautifier::setTabIndentation(int length, bool forceTabs) + { + indentString = "\t"; + indentLength = length; + shouldForceTabIndentation = forceTabs; + + if (!isMinimalConditinalIndentSet) + minConditionalIndent = indentLength * 2; + } + + /** + + * indent using a number of spaces per indentation. + * + * @param length number of spaces per indent. + */ + void ASBeautifier::setSpaceIndentation(int length) + { + indentString=string(length, ' '); + indentLength = length; + + if (!isMinimalConditinalIndentSet) + minConditionalIndent = indentLength * 2; + } + + /** + * set the maximum indentation between two lines in a multi-line statement. + * + * @param max maximum indentation length. + */ + void ASBeautifier::setMaxInStatementIndentLength(int max) + { + maxInStatementIndent = max; + } + + /** + * set the minimum indentation between two lines in a multi-line condition. + * + * @param min minimal indentation length. + */ + void ASBeautifier::setMinConditionalIndentLength(int min) + { + minConditionalIndent = min; + isMinimalConditinalIndentSet = true; + } + + /** + * set the state of the bracket indentation option. If true, brackets will + * be indented one additional indent. + * + * @param state state of option. + */ + void ASBeautifier::setBracketIndent(bool state) + { + bracketIndent = state; + } + + /** + * set the state of the block indentation option. If true, entire blocks + * will be indented one additional indent, similar to the GNU indent style. + * + * @param state state of option. + */ + void ASBeautifier::setBlockIndent(bool state) + { + if (state) + setBracketIndent(false); // so that we don't have both bracket and block indent + blockIndent = state; + } + + /** + * set the state of the class indentation option. If true, C++ class + * definitions will be indented one additional indent. + * + * @param state state of option. + */ + void ASBeautifier::setClassIndent(bool state) + { + classIndent = state; + } + + /** + * set the state of the switch indentation option. If true, blocks of 'switch' + * statements will be indented one additional indent. + * + * @param state state of option. + */ + void ASBeautifier::setSwitchIndent(bool state) + { + switchIndent = state; + } + + /** + * set the state of the case indentation option. If true, lines of 'case' + * statements will be indented one additional indent. + * + * @param state state of option. + */ + void ASBeautifier::setCaseIndent(bool state) + { + caseIndent = state; + } + /** + * set the state of the namespace indentation option. + * If true, blocks of 'namespace' statements will be indented one + * additional indent. Otherwise, NO indentation will be added. + * + * @param state state of option. + */ + void ASBeautifier::setNamespaceIndent(bool state) + { + namespaceIndent = state; + } + + /** + * set the state of the label indentation option. + * If true, labels will be indented one indent LESS than the + * current indentation level. + * If false, labels will be flushed to the left with NO + * indent at all. + * + * @param state state of option. + */ + void ASBeautifier::setLabelIndent(bool state) + { + labelIndent = state; + } + + /** + * set the state of the preprocessor indentation option. + * If true, multiline #define statements will be indented. + * + * @param state state of option. + */ + void ASBeautifier::setPreprocessorIndent(bool state) + { + preprocessorIndent = state; + } + + /** + * set the state of the empty line fill option. + * If true, empty lines will be filled with the whitespace. + * of their previous lines. + * If false, these lines will remain empty. + * + * @param state state of option. + */ + void ASBeautifier::setEmptyLineFill(bool state) + { + emptyLineFill = state; + } + + /** + * check if there are any indented lines ready to be read by nextLine() + * + * @return are there any indented lines ready? + */ + bool ASBeautifier::hasMoreLines() const + { + return sourceIterator->hasMoreLines(); + } + + /** + * get the next indented line. + * + * @return indented line. + */ + string ASBeautifier::nextLine() + { + return beautify(sourceIterator->nextLine()); + } + + /** + * beautify a line of source code. + * every line of source code in a source code file should be sent + * one after the other to the beautify method. + * + * @return the indented line. + * @param originalLine the original unindented line. + */ + string ASBeautifier::beautify(const string &originalLine) + { + string line; + bool isInLineComment = false; + bool lineStartsInComment = false; + bool isInClass = false; + bool isInSwitch = false; + bool isImmediatelyAfterConst = false; + bool isSpecialChar = false; + + char ch = ' '; + char prevCh; + string outBuffer; // the newly idented line is bufferd here + int tabCount = 0; + const string *lastLineHeader = NULL; + bool closingBracketReached = false; + int spaceTabCount = 0; + char tempCh; + unsigned int headerStackSize = headerStack->size(); + //bool isLineInStatement = isInStatement; + bool shouldIndentBrackettedLine = true; + int lineOpeningBlocksNum = 0; + int lineClosingBlocksNum = 0; + bool previousLineProbation = (probationHeader != NULL); + unsigned int i; + + currentHeader = NULL; + + lineStartsInComment = isInComment; + + // handle and remove white spaces around the line: + // If not in comment, first find out size of white space before line, + // so that possible comments starting in the line continue in + // relation to the preliminary white-space. + if (!isInComment) + { + leadingWhiteSpaces = 0; + while (leadingWhiteSpaces<originalLine.length() && originalLine[leadingWhiteSpaces] <= 0x20) + leadingWhiteSpaces++; + + line = trim(originalLine); + } + else + { + unsigned int trimSize; + for (trimSize=0; + trimSize < originalLine.length() && trimSize<leadingWhiteSpaces && originalLine[trimSize] <= 0x20 ; + trimSize++) + ; + line = originalLine.substr(trimSize); + } + + + if (line.length() == 0) + { + if (emptyLineFill) + return preLineWS(prevFinalLineSpaceTabCount, prevFinalLineTabCount); + else + return line; + } + + // handle preprocessor commands + + if (isCStyle && !isInComment && (line[0] == '#' || backslashEndsPrevLine)) + { + if (line[0] == '#') + { + string preproc = trim(string(line.c_str() + 1)); + + + // When finding a multi-lined #define statement, the original beautifier + // 1. sets its isInDefineDefinition flag + // 2. clones a new beautifier that will be used for the actual indentation + // of the #define. This clone is put into the activeBeautifierStack in order + // to be called for the actual indentation. + // The original beautifier will have isInDefineDefinition = true, isInDefine = false + // The cloned beautifier will have isInDefineDefinition = true, isInDefine = true + if (preprocessorIndent && preproc.COMPARE(0, 6, string("define")) == 0 && line[line.length() - 1] == '\\') + { + if (!isInDefineDefinition) + { + ASBeautifier *defineBeautifier; + + // this is the original beautifier + isInDefineDefinition = true; + + // push a new beautifier into the active stack + // this breautifier will be used for the indentation of this define + defineBeautifier = new ASBeautifier(*this); + //defineBeautifier->init(); + //defineBeautifier->isInDefineDefinition = true; + //defineBeautifier->beautify(""); + activeBeautifierStack->push_back(defineBeautifier); + } + else + { + // the is the cloned beautifier that is in charge of indenting the #define. + isInDefine = true; + } + } + else if (preproc.COMPARE(0, 2, string("if")) == 0) + { + // push a new beautifier into the stack + waitingBeautifierStackLengthStack->push_back(waitingBeautifierStack->size()); + activeBeautifierStackLengthStack->push_back(activeBeautifierStack->size()); + waitingBeautifierStack->push_back(new ASBeautifier(*this)); + } + else if (preproc.COMPARE(0, 4/*2*/, string("else")) == 0) + { + if (!waitingBeautifierStack->empty()) + { + // MOVE current waiting beautifier to active stack. + activeBeautifierStack->push_back(waitingBeautifierStack->back()); + waitingBeautifierStack->pop_back(); + } + } + else if (preproc.COMPARE(0, 4, string("elif")) == 0) + { + if (!waitingBeautifierStack->empty()) + { + // append a COPY current waiting beautifier to active stack, WITHOUT deleting the original. + activeBeautifierStack->push_back( new ASBeautifier( *(waitingBeautifierStack->back()) ) ); + } + } + else if (preproc.COMPARE(0, 5, string("endif")) == 0) + { + unsigned int stackLength; + ASBeautifier *beautifier; + + if (!waitingBeautifierStackLengthStack->empty()) + { + stackLength = waitingBeautifierStackLengthStack->back(); + waitingBeautifierStackLengthStack->pop_back(); + while (waitingBeautifierStack->size() > stackLength) + { + beautifier = waitingBeautifierStack->back(); + waitingBeautifierStack->pop_back(); + delete beautifier; + } + } + + if (!activeBeautifierStackLengthStack->empty()) + { + stackLength = activeBeautifierStackLengthStack->back(); + activeBeautifierStackLengthStack->pop_back(); + while (activeBeautifierStack->size() > stackLength) + { + beautifier = activeBeautifierStack->back(); + activeBeautifierStack->pop_back(); + delete beautifier; + } + } + + + } + } + + // check if the last char is a backslash + if(line.length() > 0) + backslashEndsPrevLine = (line[line.length() - 1] == '\\'); + else + backslashEndsPrevLine = false; + + // check if this line ends a multi-line #define + // if so, use the #define's cloned beautifier for the line's indentation + // and then remove it from the active beautifier stack and delete it. + if (!backslashEndsPrevLine && isInDefineDefinition && !isInDefine) + { + string beautifiedLine; + ASBeautifier *defineBeautifier; + + isInDefineDefinition = false; + defineBeautifier = activeBeautifierStack->back(); + activeBeautifierStack->pop_back(); + + beautifiedLine = defineBeautifier->beautify(line); + delete defineBeautifier; + return beautifiedLine; + } + + // unless this is a multi-line #define, return this precompiler line as is. + if (!isInDefine && !isInDefineDefinition) + return originalLine; + } + + // if there exists any worker beautifier in the activeBeautifierStack, + // then use it instead of me to indent the current line. + if (!isInDefine && activeBeautifierStack != NULL && !activeBeautifierStack->empty()) + { + return activeBeautifierStack->back()->beautify(line); + } + + // calculate preliminary indentation based on data from past lines + if (!inStatementIndentStack->empty()) + spaceTabCount = inStatementIndentStack->back(); + + + for (i=0; i<headerStackSize; i++) + { + isInClass = false; + + if (blockIndent || (!(i>0 && (*headerStack)[i-1] != &AS_OPEN_BRACKET + && (*headerStack)[i] == &AS_OPEN_BRACKET))) + ++tabCount; + + if (isCStyle && !namespaceIndent && i >= 1 + && (*headerStack)[i-1] == &AS_NAMESPACE + && (*headerStack)[i] == &AS_OPEN_BRACKET) + --tabCount; + + if (isCStyle && i >= 1 + && (*headerStack)[i-1] == &AS_CLASS + && (*headerStack)[i] == &AS_OPEN_BRACKET ) + { + if (classIndent) + ++tabCount; + isInClass = true; + } + + // is the switchIndent option is on, indent switch statements an additional indent. + else if (switchIndent && i > 1 && + (*headerStack)[i-1] == &AS_SWITCH && + (*headerStack)[i] == &AS_OPEN_BRACKET + ) + { + ++tabCount; + isInSwitch = true; + } + + } + + if (!lineStartsInComment + && isCStyle + && isInClass + && classIndent + && headerStackSize >= 2 + &&(*headerStack)[headerStackSize-2] == &AS_CLASS + && (*headerStack)[headerStackSize-1] == &AS_OPEN_BRACKET + && line[0] == '}') + --tabCount; + + else if (!lineStartsInComment + && isInSwitch + && switchIndent + && headerStackSize >= 2 + && (*headerStack)[headerStackSize-2] == &AS_SWITCH + && (*headerStack)[headerStackSize-1] == &AS_OPEN_BRACKET + && line[0] == '}') + --tabCount; + + if (isInClassHeader) + { + isInClassHeaderTab = true; + tabCount += 2; + } + + if (isInConditional) + { + --tabCount; + } + + + // parse characters in the current line. + + for (i=0; i<line.length(); i++) + { + tempCh = line[i]; + + prevCh = ch; + ch = tempCh; + + outBuffer.append(1, ch); + + if (isWhiteSpace(ch)) + continue; + + + // handle special characters (i.e. backslash+character such as \n, \t, ...) + if (isSpecialChar) + { + isSpecialChar = false; + continue; + } + if (!(isInComment || isInLineComment) && line.COMPARE(i, 2, string("\\\\")) == 0) + { + outBuffer.append(1, '\\'); + i++; + continue; + } + if (!(isInComment || isInLineComment) && ch=='\\') + { + isSpecialChar = true; + continue; + } + + // handle quotes (such as 'x' and "Hello Dolly") + if (!(isInComment || isInLineComment) && (ch=='"' || ch=='\'')) + if (!isInQuote) + { + quoteChar = ch; + isInQuote = true; + } + else if (quoteChar == ch) + { + isInQuote = false; + isInStatement = true; + continue; + } + if (isInQuote) + continue; + + // handle comments + + if ( !(isInComment || isInLineComment) && line.COMPARE(i, 2, AS_OPEN_LINE_COMMENT) == 0 ) + { + isInLineComment = true; + outBuffer.append(1, '/'); + i++; + continue; + } + else if ( !(isInComment || isInLineComment) && line.COMPARE(i, 2, AS_OPEN_COMMENT) == 0 ) + { + isInComment = true; + outBuffer.append(1, '*'); + i++; + continue; + } + else if ( (isInComment || isInLineComment) && line.COMPARE(i, 2, AS_CLOSE_COMMENT) == 0 ) + { + isInComment = false; + outBuffer.append(1, '/'); + i++; + continue; + } + + if (isInComment||isInLineComment) + continue; + + // if we have reached this far then we are NOT in a comment or string of special character... + + if (probationHeader != NULL) + { + if ( ((probationHeader == &AS_STATIC || probationHeader == &AS_CONST) && ch == '{') + || (probationHeader == &AS_SYNCHRONIZED && ch == '(')) + { + // insert the probation header as a new header + isInHeader = true; + headerStack->push_back(probationHeader); + + // handle the specific probation header + isInConditional = (probationHeader == &AS_SYNCHRONIZED); + if (probationHeader == &AS_CONST) + isImmediatelyAfterConst = true; + // isInConst = true; + /* TODO: + * There is actually no more need for the global isInConst variable. + * The only reason for checking const is to see if there is a const + * immediately before an open-bracket. + * Since CONST is now put into probation and is checked during itspost-char, + * isImmediatelyAfterConst can be set by its own... + */ + + isInStatement = false; + // if the probation comes from the previous line, then indent by 1 tab count. + if (previousLineProbation && ch == '{') + tabCount++; + previousLineProbation = false; + } + + // dismiss the probation header + probationHeader = NULL; + } + + prevNonSpaceCh = currentNonSpaceCh; + currentNonSpaceCh = ch; + if (!isLegalNameChar(ch) && ch != ',' && ch != ';' ) + { + prevNonLegalCh = currentNonLegalCh; + currentNonLegalCh = ch; + } + + //if (isInConst) + //{ + // isInConst = false; + // isImmediatelyAfterConst = true; + //} + + if (isInHeader) + { + isInHeader = false; + currentHeader = headerStack->back(); + } + else + currentHeader = NULL; + + if (isCStyle && isInTemplate + && (ch == '<' || ch == '>') + && findHeader(line, i, nonAssignmentOperators) == NULL) //; + { + if (ch == '<') + { + ++templateDepth; + } + else if (ch == '>') + { + if (--templateDepth <= 0) + { + if (isInTemplate) + ch = ';'; + else + ch = 't'; + isInTemplate = false; + templateDepth = 0; + } + + } + } + + // handle parenthesies + if (ch == '(' || ch == '[' || ch == ')' || ch == ']') + { + if (ch == '(' || ch == '[') + { + if (parenDepth == 0) + { + parenStatementStack->push_back(isInStatement); + isInStatement = true; + } + parenDepth++; + + inStatementIndentStackSizeStack->push_back(inStatementIndentStack->size()); + + if (currentHeader != NULL) + registerInStatementIndent(line, i, spaceTabCount, minConditionalIndent/*indentLength*2*/, true); + else + registerInStatementIndent(line, i, spaceTabCount, 0, true); + } + else if (ch == ')' || ch == ']') + { + parenDepth--; + if (parenDepth == 0) + { + isInStatement = parenStatementStack->back(); + parenStatementStack->pop_back(); + ch = ' '; + + isInConditional = false; + } + + if (!inStatementIndentStackSizeStack->empty()) + { + unsigned int previousIndentStackSize = inStatementIndentStackSizeStack->back(); + inStatementIndentStackSizeStack->pop_back(); + while (previousIndentStackSize < inStatementIndentStack->size()) + inStatementIndentStack->pop_back(); + + if (!parenIndentStack->empty()) + { + int poppedIndent = parenIndentStack->back(); + parenIndentStack->pop_back(); + + if (i == 0) + spaceTabCount = poppedIndent; + } + } + } + + continue; + } + + + if (ch == '{') + { + bool isBlockOpener = false; + + // first, check if '{' is a block-opener or an static-array opener + isBlockOpener = ( (prevNonSpaceCh == '{' && bracketBlockStateStack->back()) + || prevNonSpaceCh == '}' + || prevNonSpaceCh == ')' + || prevNonSpaceCh == ';' + || isInClassHeader + || isBlockOpener + || isImmediatelyAfterConst + || (isInDefine && + (prevNonSpaceCh == '(' + || prevNonSpaceCh == '_' + || isalnum(prevNonSpaceCh))) ); + + isInClassHeader = false; + if (!isBlockOpener && currentHeader != NULL) + { + for (unsigned int n=0; n < nonParenHeaders.size(); n++) + if (currentHeader == nonParenHeaders[n]) + { + isBlockOpener = true; + break; + } + } + bracketBlockStateStack->push_back(isBlockOpener); + if (!isBlockOpener) + { + inStatementIndentStackSizeStack->push_back(inStatementIndentStack->size()); + registerInStatementIndent(line, i, spaceTabCount, 0, true); + parenDepth++; + if (i == 0) + shouldIndentBrackettedLine = false; + + continue; + } + + // this bracket is a block opener... + + ++lineOpeningBlocksNum; + + if (isInClassHeader) + isInClassHeader = false; + if (isInClassHeaderTab) + { + isInClassHeaderTab = false; + tabCount -= 2; + } + + blockParenDepthStack->push_back(parenDepth); + blockStatementStack->push_back(isInStatement); + + inStatementIndentStackSizeStack->push_back(inStatementIndentStack->size()); + + blockTabCount += isInStatement? 1 : 0; + parenDepth = 0; + isInStatement = false; + + tempStacks->push_back(new vector<const string*>); + headerStack->push_back(&AS_OPEN_BRACKET); + lastLineHeader = &AS_OPEN_BRACKET; // <------ + + continue; + } + + //check if a header has been reached + if (prevCh == ' ') + { + bool isIndentableHeader = true; + const string *newHeader = findHeader(line, i, headers); + if (newHeader != NULL) + { + // if we reached here, then this is a header... + isInHeader = true; + + vector<const string*> *lastTempStack; + if (tempStacks->empty()) + lastTempStack = NULL; + else + lastTempStack = tempStacks->back(); + + // if a new block is opened, push a new stack into tempStacks to hold the + // future list of headers in the new block. + + // take care of the special case: 'else if (...)' + if (newHeader == &AS_IF && lastLineHeader == &AS_ELSE) + { + //spaceTabCount += indentLength; // to counter the opposite addition that occurs when the 'if' is registered below... + headerStack->pop_back(); + } + + // take care of 'else' + else if (newHeader == &AS_ELSE) + { + if (lastTempStack != NULL) + { + int indexOfIf = indexOf(*lastTempStack, &AS_IF); // <--- + if (indexOfIf != -1) + { + // recreate the header list in headerStack up to the previous 'if' + // from the temporary snapshot stored in lastTempStack. + int restackSize = lastTempStack->size() - indexOfIf - 1; + for (int r=0; r<restackSize; r++) + { + headerStack->push_back(lastTempStack->back()); + lastTempStack->pop_back(); + } + if (!closingBracketReached) + tabCount += restackSize; + } + /* + * If the above if is not true, i.e. no 'if' before the 'else', + * then nothing beautiful will come out of this... + * I should think about inserting an Exception here to notify the caller of this... + */ + } + } + + // check if 'while' closes a previous 'do' + else if (newHeader == &AS_WHILE) + { + if (lastTempStack != NULL) + { + int indexOfDo = indexOf(*lastTempStack, &AS_DO); // <--- + if (indexOfDo != -1) + { + // recreate the header list in headerStack up to the previous 'do' + // from the temporary snapshot stored in lastTempStack. + int restackSize = lastTempStack->size() - indexOfDo - 1; + for (int r=0; r<restackSize; r++) + { + headerStack->push_back(lastTempStack->back()); + lastTempStack->pop_back(); + } + if (!closingBracketReached) + tabCount += restackSize; + } + } + } + // check if 'catch' closes a previous 'try' or 'catch' + else if (newHeader == &AS_CATCH || newHeader == &AS_FINALLY) + { + if (lastTempStack != NULL) + { + int indexOfTry = indexOf(*lastTempStack, &AS_TRY); + if (indexOfTry == -1) + indexOfTry = indexOf(*lastTempStack, &AS_CATCH); + if (indexOfTry != -1) + { + // recreate the header list in headerStack up to the previous 'try' + // from the temporary snapshot stored in lastTempStack. + int restackSize = lastTempStack->size() - indexOfTry - 1; + for (int r=0; r<restackSize; r++) + { + headerStack->push_back(lastTempStack->back()); + lastTempStack->pop_back(); + } + + if (!closingBracketReached) + tabCount += restackSize; + } + } + } + else if (newHeader == &AS_CASE) + { + isInCase = true; + if (!caseIndent) + --tabCount; + } + else if(newHeader == &AS_DEFAULT) + { + isInCase = true; + if (!caseIndent) + --tabCount; + } + else if (newHeader == &AS_PUBLIC || newHeader == &AS_PROTECTED || newHeader == &AS_PRIVATE) + { + if (isCStyle && !isInClassHeader) + --tabCount; + isIndentableHeader = false; + } + //else if ((newHeader == &STATIC || newHeader == &SYNCHRONIZED) && + // !headerStack->empty() && + // (headerStack->back() == &STATIC || headerStack->back() == &SYNCHRONIZED)) + //{ + // isIndentableHeader = false; + //} + else if (newHeader == &AS_STATIC + || newHeader == &AS_SYNCHRONIZED + || (newHeader == &AS_CONST && isCStyle)) + { + if (!headerStack->empty() && + (headerStack->back() == &AS_STATIC + || headerStack->back() == &AS_SYNCHRONIZED + || headerStack->back() == &AS_CONST)) + { + isIndentableHeader = false; + } + else + { + isIndentableHeader = false; + probationHeader = newHeader; + } + } + else if (newHeader == &AS_CONST) + { + // this will be entered only if NOT in C style + // since otherwise the CONST would be found to be a probstion header... + + //if (isCStyle) + // isInConst = true; + isIndentableHeader = false; + } + /* + else if (newHeader == &OPERATOR) + { + if (isCStyle) + isInOperator = true; + isIndentableHeader = false; + } + */ + else if (newHeader == &AS_TEMPLATE) + { + if (isCStyle) + isInTemplate = true; + isIndentableHeader = false; + } + + + if (isIndentableHeader) + { + // 3.2.99 + //spaceTabCount-=indentLength; + headerStack->push_back(newHeader); + isInStatement = false; + if (indexOf(nonParenHeaders, newHeader) == -1) + { + isInConditional = true; + } + lastLineHeader = newHeader; + } + else + isInHeader = false; + + //lastLineHeader = newHeader; + + outBuffer.append(newHeader->substr(1)); + i += newHeader->length() - 1; + + continue; + } + } + + if (isCStyle && !isalpha(prevCh) + && line.COMPARE(i, 8, AS_OPERATOR) == 0 && !isalnum(line[i+8])) + { + isInOperator = true; + outBuffer.append(AS_OPERATOR.substr(1)); + i += 7; + continue; + } + + if (ch == '?') + isInQuestion = true; + + + // special handling of 'case' statements + if (ch == ':') + { + if (line.length() > i+1 && line[i+1] == ':') // look for :: + { + ++i; + outBuffer.append(1, ':'); + ch = ' '; + continue; + } + + else if (isCStyle && isInClass && prevNonSpaceCh != ')') + { + // BEGIN Content of ASBeautifier.cpp.BITFIELD.patch: + + unsigned int chIndex; + char nextCh = 0; + for (chIndex = i+1; chIndex < line.length(); chIndex++) + if (!isWhiteSpace(line[chIndex])) + break; + if (chIndex< line.length()) + nextCh = line[chIndex]; + int nWord =0; + for (chIndex = 0; chIndex < i; chIndex++) + { + if (!isWhiteSpace(line[chIndex])) + { + nWord ++; + while (!isWhiteSpace(line[++chIndex])); + } + } + if ((nextCh >= '0' && nextCh <= '9') || (nWord >1)) + continue; + // END Content of ASBeautifier.cpp.BITFIELD.patch: + + --tabCount; + // found a 'private:' or 'public:' inside a class definition + // so do nothing special + } + + else if (isCStyle && isInClassHeader) + { + + // found a 'class A : public B' definition + // so do nothing special + } + + else if (isInQuestion) + { + isInQuestion = false; + } + else if (isCStyle && prevNonSpaceCh == ')') + { + isInClassHeader = true; + if (i==0) + tabCount += 2; + } + else + { + currentNonSpaceCh = ';'; // so that brackets after the ':' will appear as block-openers + if (isInCase) + { + isInCase = false; + ch = ';'; // from here on, treat char as ';' + } + // BEGIN content of ASBeautifier.cpp.BITFIELD.patch.bz2 + else // bitfield or labels + { + unsigned int chIndex; + char nextCh = 0; + for (chIndex = i+1; (isCStyle && chIndex < line.length()); chIndex++) + if (!isWhiteSpace(line[chIndex])) + break; + if (chIndex< line.length()) + nextCh = line[chIndex]; + + int nWord =0; + for (chIndex = 0; chIndex < i; chIndex++) + { + if (!isWhiteSpace(line[chIndex])) + { + nWord ++; + while (!isWhiteSpace(line[++chIndex])); + } + } + if (isCStyle && (nextCh >= '0' && nextCh <= '9') || (nWord >1)) + { + continue; + } + // END content of ASASBeautifier.cpp.BITFIELD.patch.bz2 + + else // is in a label (e.g. 'label1:') + { + if (labelIndent) + --tabCount; // unindent label by one indent + else + tabCount = 0; // completely flush indent to left + } + + // BEGIN content of ASASBeautifier.cpp.BITFIELD.patch.bz2 + } + // END content of ASASBeautifier.cpp.BITFIELD.patch.bz2 + + } + } + + if ((ch == ';' || (parenDepth>0 && ch == ',')) && !inStatementIndentStackSizeStack->empty()) + while ((unsigned int)inStatementIndentStackSizeStack->back() + (parenDepth>0 ? 1 : 0) < inStatementIndentStack->size()) + inStatementIndentStack->pop_back(); + + + // handle ends of statements + if ( (ch == ';' && parenDepth == 0) || ch == '}'/* || (ch == ',' && parenDepth == 0)*/) + { + if (ch == '}') + { + // first check if this '}' closes a previous block, or a static array... + if (!bracketBlockStateStack->empty()) + { + bool bracketBlockState = bracketBlockStateStack->back(); + bracketBlockStateStack->pop_back(); + if (!bracketBlockState) + { + if (!inStatementIndentStackSizeStack->empty()) + { + // this bracket is a static array + + unsigned int previousIndentStackSize = inStatementIndentStackSizeStack->back(); + inStatementIndentStackSizeStack->pop_back(); + while (previousIndentStackSize < inStatementIndentStack->size()) + inStatementIndentStack->pop_back(); + parenDepth--; + if (i == 0) + shouldIndentBrackettedLine = false; + + if (!parenIndentStack->empty()) + { + int poppedIndent = parenIndentStack->back(); + parenIndentStack->pop_back(); + if (i == 0) + spaceTabCount = poppedIndent; + } + } + continue; + } + } + + // this bracket is block closer... + + ++lineClosingBlocksNum; + + if(!inStatementIndentStackSizeStack->empty()) + inStatementIndentStackSizeStack->pop_back(); + + if (!blockParenDepthStack->empty()) + { + parenDepth = blockParenDepthStack->back(); + blockParenDepthStack->pop_back(); + isInStatement = blockStatementStack->back(); + blockStatementStack->pop_back(); + + if (isInStatement) + blockTabCount--; + } + + closingBracketReached = true; + int headerPlace = indexOf(*headerStack, &AS_OPEN_BRACKET); // <--- + if (headerPlace != -1) + { + const string *popped = headerStack->back(); + while (popped != &AS_OPEN_BRACKET) + { + headerStack->pop_back(); + popped = headerStack->back(); + } + headerStack->pop_back(); + + if (!tempStacks->empty()) + { + vector<const string*> *temp = tempStacks->back(); + tempStacks->pop_back(); + delete temp; + } + } + + + ch = ' '; // needed due to cases such as '}else{', so that headers ('else' tn tih case) will be identified... + } + + /* + * Create a temporary snapshot of the current block's header-list in the + * uppermost inner stack in tempStacks, and clear the headerStack up to + * the begining of the block. + * Thus, the next future statement will think it comes one indent past + * the block's '{' unless it specifically checks for a companion-header + * (such as a previous 'if' for an 'else' header) within the tempStacks, + * and recreates the temporary snapshot by manipulating the tempStacks. + */ + if (!tempStacks->back()->empty()) + while (!tempStacks->back()->empty()) + tempStacks->back()->pop_back(); + while (!headerStack->empty() && headerStack->back() != &AS_OPEN_BRACKET) + { + tempStacks->back()->push_back(headerStack->back()); + headerStack->pop_back(); + } + + if (parenDepth == 0 && ch == ';') + isInStatement=false; + + isInClassHeader = false; + + continue; + } + + + // check for preBlockStatements ONLY if not within parenthesies + // (otherwise 'struct XXX' statements would be wrongly interpreted...) + if (prevCh == ' ' && !isInTemplate && parenDepth == 0) + { + const string *newHeader = findHeader(line, i, preBlockStatements); + if (newHeader != NULL) + { + isInClassHeader = true; + outBuffer.append(newHeader->substr(1)); + i += newHeader->length() - 1; + //if (isCStyle) + headerStack->push_back(newHeader); + } + } + + // Handle operators + // + + //// // PRECHECK if a '==' or '--' or '++' operator was reached. + //// // If not, then register an indent IF an assignment operator was reached. + //// // The precheck is important, so that statements such as 'i--==2' are not recognized + //// // to have assignment operators (here, '-=') in them . . . + + const string *foundAssignmentOp = NULL; + const string *foundNonAssignmentOp = NULL; + + immediatelyPreviousAssignmentOp = NULL; + + // Check if an operator has been reached. + foundAssignmentOp = findHeader(line, i, assignmentOperators, false); + foundNonAssignmentOp = findHeader(line, i, nonAssignmentOperators, false); + + // Since findHeader's boundry checking was not used above, it is possible + // that both an assignment op and a non-assignment op where found, + // e.g. '>>' and '>>='. If this is the case, treat the LONGER one as the + // found operator. + if (foundAssignmentOp != NULL && foundNonAssignmentOp != NULL) + if (foundAssignmentOp->length() < foundNonAssignmentOp->length()) + foundAssignmentOp = NULL; + else + foundNonAssignmentOp = NULL; + + if (foundNonAssignmentOp != NULL) + { + if (foundNonAssignmentOp->length() > 1) + { + outBuffer.append(foundNonAssignmentOp->substr(1)); + i += foundNonAssignmentOp->length() - 1; + } + } + + else if (foundAssignmentOp != NULL) + + { + if (foundAssignmentOp->length() > 1) + { + outBuffer.append(foundAssignmentOp->substr(1)); + i += foundAssignmentOp->length() - 1; + } + + if (!isInOperator && !isInTemplate) + { + registerInStatementIndent(line, i, spaceTabCount, 0, false); + immediatelyPreviousAssignmentOp = foundAssignmentOp; + isInStatement = true; + } + } + + /* + immediatelyPreviousAssignmentOp = NULL; + bool isNonAssingmentOperator = false; + for (int n = 0; n < nonAssignmentOperators.size(); n++) + if (line.COMPARE(i, nonAssignmentOperators[n]->length(), *(nonAssignmentOperators[n])) == 0) + { + if (nonAssignmentOperators[n]->length() > 1) + { + outBuffer.append(nonAssignmentOperators[n]->substr(1)); + i += nonAssignmentOperators[n]->length() - 1; + } + isNonAssingmentOperator = true; + break; + } + if (!isNonAssingmentOperator) + { + for (int a = 0; a < assignmentOperators.size(); a++) + if (line.COMPARE(i, assignmentOperators[a]->length(), *(assignmentOperators[a])) == 0) + { + if (assignmentOperators[a]->length() > 1) + { + outBuffer.append(assignmentOperators[a]->substr(1)); + i += assignmentOperators[a]->length() - 1; + } + + if (!isInOperator && !isInTemplate) + { + registerInStatementIndent(line, i, spaceTabCount, 0, false); + immediatelyPreviousAssignmentOp = assignmentOperators[a]; + isInStatement = true; + } + break; + } + } + */ + + if (isInOperator) + isInOperator = false; + } + + // handle special cases of unindentation: + + /* + * if '{' doesn't follow an immediately previous '{' in the headerStack + * (but rather another header such as "for" or "if", then unindent it + * by one indentation relative to its block. + */ + // cerr << endl << lineOpeningBlocksNum << " " << lineClosingBlocksNum << " " << previousLastLineHeader << endl; + + // indent #define lines with one less tab + //if (isInDefine) + // tabCount -= defineTabCount-1; + + + if (!lineStartsInComment + && !blockIndent + && outBuffer.length()>0 + && outBuffer[0]=='{' + && !(lineOpeningBlocksNum > 0 && lineOpeningBlocksNum == lineClosingBlocksNum) + && !(headerStack->size() > 1 && (*headerStack)[headerStack->size()-2] == &AS_OPEN_BRACKET) + && shouldIndentBrackettedLine) + --tabCount; + + else if (!lineStartsInComment + && outBuffer.length()>0 + && outBuffer[0]=='}' + && shouldIndentBrackettedLine ) + --tabCount; + + // correctly indent one-line-blocks... + else if (!lineStartsInComment + && outBuffer.length()>0 + && lineOpeningBlocksNum > 0 + && lineOpeningBlocksNum == lineClosingBlocksNum + && previousLastLineHeader != NULL + && previousLastLineHeader != &AS_OPEN_BRACKET) + tabCount -= 1; //lineOpeningBlocksNum - (blockIndent ? 1 : 0); + + if (tabCount < 0) + tabCount = 0; + + // take care of extra bracket indentatation option... + if (bracketIndent && outBuffer.length()>0 && shouldIndentBrackettedLine) + if (outBuffer[0]=='{' || outBuffer[0]=='}') + tabCount++; + + + if (isInDefine) + { + if (outBuffer[0] == '#') + { + string preproc = trim(string(outBuffer.c_str() + 1)); + if (preproc.COMPARE(0, 6, string("define")) == 0) + { + if (!inStatementIndentStack->empty() + && inStatementIndentStack->back() > 0) + { + defineTabCount = tabCount; + } + else + { + defineTabCount = tabCount - 1; + tabCount--; + } + } + } + + tabCount -= defineTabCount; + } + + if (tabCount < 0) + tabCount = 0; + + + // finally, insert indentations into begining of line + + prevFinalLineSpaceTabCount = spaceTabCount; + prevFinalLineTabCount = tabCount; + + if (shouldForceTabIndentation) + { + tabCount += spaceTabCount / indentLength; + spaceTabCount = spaceTabCount % indentLength; + } + + outBuffer = preLineWS(spaceTabCount,tabCount) + outBuffer; + + if (lastLineHeader != NULL) + previousLastLineHeader = lastLineHeader; + + return outBuffer; + } + + + string ASBeautifier::preLineWS(int spaceTabCount, int tabCount) + { + string ws; + + for (int i=0; i<tabCount; i++) + ws += indentString; + + while ((spaceTabCount--) > 0) + ws += string(" "); + + return ws; + + } + + /** + * register an in-statement indent. + */ + void ASBeautifier::registerInStatementIndent(const string &line, int i, int spaceTabCount, + int minIndent, bool updateParenStack) + { + int inStatementIndent; + int remainingCharNum = line.length() - i; + int nextNonWSChar = 1; + + nextNonWSChar = getNextProgramCharDistance(line, i); + + // if indent is around the last char in the line, indent instead 2 spaces from the previous indent + if (nextNonWSChar == remainingCharNum) + { + int previousIndent = spaceTabCount; + if (!inStatementIndentStack->empty()) + previousIndent = inStatementIndentStack->back(); + + inStatementIndentStack->push_back(/*2*/ indentLength + previousIndent ); + if (updateParenStack) + parenIndentStack->push_back( previousIndent ); + return; + } + + if (updateParenStack) + parenIndentStack->push_back(i+spaceTabCount); + + inStatementIndent = i + nextNonWSChar + spaceTabCount; + + if (i + nextNonWSChar < minIndent) + inStatementIndent = minIndent + spaceTabCount; + + if (i + nextNonWSChar > maxInStatementIndent) + inStatementIndent = indentLength*2 + spaceTabCount; + + + + if (!inStatementIndentStack->empty() && + inStatementIndent < inStatementIndentStack->back()) + inStatementIndent = inStatementIndentStack->back(); + + inStatementIndentStack->push_back(inStatementIndent); + } + + /** + * get distance to the next non-white sspace, non-comment character in the line. + * if no such character exists, return the length remaining to the end of the line. + */ + int ASBeautifier::getNextProgramCharDistance(const string &line, int i) + { + bool inComment = false; + int remainingCharNum = line.length() - i; + int charDistance = 1; + int ch; + + for (charDistance = 1; charDistance < remainingCharNum; charDistance++) + { + ch = line[i + charDistance]; + if (inComment) + { + if (line.COMPARE(i + charDistance, 2, AS_CLOSE_COMMENT) == 0) + { + charDistance++; + inComment = false; + } + continue; + } + else if (isWhiteSpace(ch)) + continue; + else if (ch == '/') + { + if (line.COMPARE(i + charDistance, 2, AS_OPEN_LINE_COMMENT) == 0) + return remainingCharNum; + else if (line.COMPARE(i + charDistance, 2, AS_OPEN_COMMENT) == 0) + { + charDistance++; + inComment = true; + } + } + else + return charDistance; + } + + return charDistance; + } + + + /** + * check if a specific character can be used in a legal variable/method/class name + * + * @return legality of the char. + * @param ch the character to be checked. + */ + bool ASBeautifier::isLegalNameChar(char ch) const + { + return (isalnum(ch) //(ch>='a' && ch<='z') || (ch>='A' && ch<='Z') || (ch>='0' && ch<='9') || + || ch=='.' || ch=='_' || (!isCStyle && ch=='$') || (isCStyle && ch=='~')); + } + + + /** + * check if a specific line position contains a header, out of several possible headers. + * + * @return a pointer to the found header. if no header was found then return NULL. + */ + const string *ASBeautifier::findHeader(const string &line, int i, const vector<const string*> &possibleHeaders, bool checkBoundry) + { + int maxHeaders = possibleHeaders.size(); + const string *header = NULL; + int p; + + for (p=0; p < maxHeaders; p++) + { + header = possibleHeaders[p]; + + if (line.COMPARE(i, header->length(), *header) == 0) + { + // check that this is a header and not a part of a longer word + // (e.g. not at its begining, not at its middle...) + + int lineLength = line.length(); + int headerEnd = i + header->length(); + char startCh = (*header)[0]; // first char of header + char endCh = 0; // char just after header + char prevCh = 0; // char just before header + + if (headerEnd < lineLength) + { + endCh = line[headerEnd]; + } + if (i > 0) + { + prevCh = line[i-1]; + } + + if (!checkBoundry) + { + return header; + } + else if (prevCh != 0 + && isLegalNameChar(startCh) + && isLegalNameChar(prevCh)) + { + return NULL; + } + else if (headerEnd >= lineLength + || !isLegalNameChar(startCh) + || !isLegalNameChar(endCh)) + { + return header; + } + else + { + return NULL; + } + } + } + + return NULL; + } + + + /** + * check if a specific character can be used in a legal variable/method/class name + * + * @return legality of the char. + * @param ch the character to be checked. + */ + bool ASBeautifier::isWhiteSpace(char ch) const + { + return (ch == ' ' || ch == '\t'); + } + + /** + * find the index number of a string element in a container of strings + * + * @return the index number of element in the ocntainer. -1 if element not found. + * @param container a vector of strings. + * @param element the element to find . + */ + int ASBeautifier::indexOf(vector<const string*> &container, const string *element) + { + vector<const string*>::const_iterator where; + + where= find(container.begin(), container.end(), element); + if (where == container.end()) + return -1; + else + return where - container.begin(); + } + + /** + * trim removes the white space surrounding a line. + * + * @return the trimmed line. + * @param str the line to trim. + */ + string ASBeautifier::trim(const string &str) + { + + int start = 0; + int end = str.length() - 1; + + while (start < end && isWhiteSpace(str[start])) + start++; + + while (start <= end && isWhiteSpace(str[end])) + end--; + + string returnStr(str, start, end+1-start); + return returnStr; + } + +#ifdef USES_NAMESPACE +} +#endif +/* + * Copyright (c) 1998,1999,2000,2001,2002 Tal Davidson. All rights reserved. + * + * compiler_defines.h (1 January 1999) + * by Tal Davidson (davidsont@bigfoot.com) + * This file is a part of "Artistic Style" - an indentater and reformatter + * of C, C++, C# and Java source files. + * + * The "Artistic Style" project, including all files needed to compile it, + * is free software; you can redistribute it and/or use it and/or modify it + * under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU General Public + * License along with this program. + */ + + +#ifndef ASBEAUTIFIER_H +#define ASBEAUTIFIER_H + +#include "ASResource.h" +#include "compiler_defines.h" +#include "ASSourceIterator.h" + +#include <string> +#include <vector> + + +using namespace std; + +namespace astyle + { + + enum BracketMode { NONE_MODE, ATTACH_MODE, BREAK_MODE, BDAC_MODE }; + enum BracketType { NULL_TYPE = 0, + DEFINITION_TYPE = 1, + COMMAND_TYPE = 2, + ARRAY_TYPE = 4, + SINGLE_LINE_TYPE = 8}; + + + class ASBeautifier : protected ASResource + { + public: + ASBeautifier(); + virtual ~ASBeautifier(); + virtual void init(ASSourceIterator* iter); // pointer to dynamically created iterator. + virtual void init(); + virtual bool hasMoreLines() const; + virtual string nextLine(); + virtual string beautify(const string &line); + void setTabIndentation(int length = 4, bool forceTabs = false); + void setSpaceIndentation(int length = 4); + void setMaxInStatementIndentLength(int max); + void setMinConditionalIndentLength(int min); + void setClassIndent(bool state); + void setSwitchIndent(bool state); + void setCaseIndent(bool state); + void setBracketIndent(bool state); + void setBlockIndent(bool state); + void setNamespaceIndent(bool state); + void setLabelIndent(bool state); + void setCStyle(); + void setJavaStyle(); + void setEmptyLineFill(bool state); + void setPreprocessorIndent(bool state); + + + protected: + int getNextProgramCharDistance(const string &line, int i); + bool isLegalNameChar(char ch) const; + bool isWhiteSpace(char ch) const; + const string *findHeader(const string &line, int i, + const vector<const string*> &possibleHeaders, + bool checkBoundry = true); + string trim(const string &str); + int indexOf(vector<const string*> &container, const string *element); + + private: + ASBeautifier(const ASBeautifier ©); + void operator=(ASBeautifier&); // not to be implemented + + void initStatic(); + void registerInStatementIndent(const string &line, int i, int spaceTabCount, + int minIndent, bool updateParenStack); + string preLineWS(int spaceTabCount, int tabCount); + + static vector<const string*> headers; + static vector<const string*> nonParenHeaders; + static vector<const string*> preprocessorHeaders; + static vector<const string*> preBlockStatements; + static vector<const string*> assignmentOperators; + static vector<const string*> nonAssignmentOperators; + + static bool calledInitStatic; + + ASSourceIterator *sourceIterator; + vector<ASBeautifier*> *waitingBeautifierStack; + vector<ASBeautifier*> *activeBeautifierStack; + vector<int> *waitingBeautifierStackLengthStack; + vector<int> *activeBeautifierStackLengthStack; + vector<const string*> *headerStack; + vector< vector<const string*>* > *tempStacks; + vector<int> *blockParenDepthStack; + vector<bool> *blockStatementStack; + vector<bool> *parenStatementStack; + vector<int> *inStatementIndentStack; + vector<int> *inStatementIndentStackSizeStack; + vector<int> *parenIndentStack; + vector<bool> *bracketBlockStateStack; + string indentString; + const string *currentHeader; + const string *previousLastLineHeader; + const string *immediatelyPreviousAssignmentOp; + const string *probationHeader; + bool isInQuote; + bool isInComment; + bool isInCase; + bool isInQuestion; + bool isInStatement; + bool isInHeader; + bool isCStyle; + bool isInOperator; + bool isInTemplate; + bool isInConst; + bool isInDefine; + bool isInDefineDefinition; + bool classIndent; + bool isInClassHeader; + bool isInClassHeaderTab; + bool switchIndent; + bool caseIndent; + bool namespaceIndent; + bool bracketIndent; + bool blockIndent; + bool labelIndent; + bool preprocessorIndent; + bool isInConditional; + bool isMinimalConditinalIndentSet; + bool shouldForceTabIndentation; + int minConditionalIndent; + int parenDepth; + int indentLength; + int blockTabCount; + unsigned int leadingWhiteSpaces; + int maxInStatementIndent; + int templateDepth; + char quoteChar; + char prevNonSpaceCh; + char currentNonSpaceCh; + char currentNonLegalCh; + char prevNonLegalCh; + int prevFinalLineSpaceTabCount; + int prevFinalLineTabCount; + bool emptyLineFill; + bool backslashEndsPrevLine; + int defineTabCount; + }; +} + +#endif +/* + * Copyright (c) 1998,1999,2000,2001,2002 Tal Davidson. All rights reserved. + * + * ASFormatter.cpp + * by Tal Davidson (davidsont@bigfoot.com) + * This file is a part of "Artistic Style" - an indentater and reformatter + * of C, C++, C# and Java source files. + * + * The "Artistic Style" project, including all files needed to compile it, + * is free software; you can redistribute it and/or use it and/or modify it + * under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU General Public + * License along with this program. + * + * + * Patches: + * 26 November 1998 - Richard Bullington - + * A correction of line-breaking in headers following '}', + + * was created using a variation of a patch by Richard Bullington. + * 08 May 2004 + * applied ASFormatter450670.patch.bz2, ASFormatter.cpp.patch.bz2, + * patch1_ssvb_patch.tar.gz + */ + +#include "compiler_defines.h" +#include "ASFormatter.h" + + +#include <string> +#include <cctype> +#include <vector> +#include <algorithm> +#include <iostream> + + +#define INIT_CONTAINER(container, value) {if ( (container) != NULL ) delete (container); (container) = (value); } +#define DELETE_CONTAINER(container) {if ( (container) != NULL ) delete (container) ; } +#define IS_A(a,b) ( ((a) & (b)) == (b)) +#ifdef USES_NAMESPACE +using namespace std; + +namespace astyle + { +#endif + + + bool ASFormatter::calledInitStatic = false; + vector<const string*> ASFormatter::headers; + vector<const string*> ASFormatter::nonParenHeaders; + vector<const string*> ASFormatter::preprocessorHeaders; + vector<const string*> ASFormatter::preDefinitionHeaders; + vector<const string*> ASFormatter::preCommandHeaders; + vector<const string*> ASFormatter::operators; + vector<const string*> ASFormatter::assignmentOperators; + + + /** + * Constructor of ASFormatter + */ + ASFormatter::ASFormatter() + { + staticInit(); + + preBracketHeaderStack = NULL; + bracketTypeStack = NULL; + parenStack = NULL; + + sourceIterator = NULL; + bracketFormatMode = NONE_MODE; + shouldPadOperators = false; + shouldPadParenthesies = false; + shouldBreakOneLineBlocks = true; + shouldBreakOneLineStatements = true; + shouldConvertTabs = false; + shouldBreakBlocks = false; + shouldBreakClosingHeaderBlocks = false; + shouldBreakClosingHeaderBrackets = false; + shouldBreakElseIfs = false; + } + + /** + * Destructor of ASFormatter + */ + ASFormatter::~ASFormatter() + { + DELETE_CONTAINER( preBracketHeaderStack ); + } + + /** + * initialization of static data of ASFormatter. + */ + void ASFormatter::staticInit() + { + if (calledInitStatic) + return; + + calledInitStatic = true; + + headers.push_back(&AS_IF); + headers.push_back(&AS_ELSE); + headers.push_back(&AS_DO); + headers.push_back(&AS_WHILE); + headers.push_back(&AS_FOR); + headers.push_back(&AS_SYNCHRONIZED); + headers.push_back(&AS_TRY); + headers.push_back(&AS_CATCH); + headers.push_back(&AS_FINALLY); + headers.push_back(&AS_SWITCH); + headers.push_back(&AS_TEMPLATE); + headers.push_back(&AS_FOREACH); + headers.push_back(&AS_LOCK); + headers.push_back(&AS_UNSAFE); + headers.push_back(&AS_FIXED); + headers.push_back(&AS_GET); + headers.push_back(&AS_SET); + headers.push_back(&AS_ADD); + headers.push_back(&AS_REMOVE); + + nonParenHeaders.push_back(&AS_ELSE); + nonParenHeaders.push_back(&AS_DO); + nonParenHeaders.push_back(&AS_TRY); + nonParenHeaders.push_back(&AS_FINALLY); + nonParenHeaders.push_back(&AS_UNSAFE); + nonParenHeaders.push_back(&AS_GET); + nonParenHeaders.push_back(&AS_SET); + nonParenHeaders.push_back(&AS_ADD); + nonParenHeaders.push_back(&AS_REMOVE); + + // nonParenHeaders.push_back(&AS_TEMPLATE); + + preDefinitionHeaders.push_back(&AS_CLASS); + preDefinitionHeaders.push_back(&AS_INTERFACE); + preDefinitionHeaders.push_back(&AS_NAMESPACE); + preDefinitionHeaders.push_back(&AS_STRUCT); + + preCommandHeaders.push_back(&AS_EXTERN); + preCommandHeaders.push_back(&AS_THROWS); + preCommandHeaders.push_back(&AS_CONST); + + preprocessorHeaders.push_back(&AS_BAR_DEFINE); + //// DEVEL: removed the folowing lines + ////preprocessorHeaders.push_back(&AS_BAR_INCLUDE); + ////preprocessorHeaders.push_back(&AS_BAR_IF); // #if or #ifdef + ////preprocessorHeaders.push_back(&AS_BAR_EL); // #else or #elif + ////preprocessorHeaders.push_back(&AS_BAR_ENDIF); + + operators.push_back(&AS_PLUS_ASSIGN); + operators.push_back(&AS_MINUS_ASSIGN); + operators.push_back(&AS_MULT_ASSIGN); + operators.push_back(&AS_DIV_ASSIGN); + operators.push_back(&AS_MOD_ASSIGN); + operators.push_back(&AS_OR_ASSIGN); + operators.push_back(&AS_AND_ASSIGN); + operators.push_back(&AS_XOR_ASSIGN); + operators.push_back(&AS_EQUAL); + operators.push_back(&AS_PLUS_PLUS); + operators.push_back(&AS_MINUS_MINUS); + operators.push_back(&AS_NOT_EQUAL); + operators.push_back(&AS_GR_EQUAL); + operators.push_back(&AS_GR_GR_GR_ASSIGN); + operators.push_back(&AS_GR_GR_ASSIGN); + operators.push_back(&AS_GR_GR_GR); + operators.push_back(&AS_GR_GR); + operators.push_back(&AS_LS_EQUAL); + operators.push_back(&AS_LS_LS_LS_ASSIGN); + operators.push_back(&AS_LS_LS_ASSIGN); + operators.push_back(&AS_LS_LS_LS); + operators.push_back(&AS_LS_LS); + operators.push_back(&AS_ARROW); + operators.push_back(&AS_AND); + operators.push_back(&AS_OR); + operators.push_back(&AS_COLON_COLON); + + //// BUGFIX: removed the folowing lines + //// operators.push_back(&AS_PAREN_PAREN); + //// operators.push_back(&AS_BLPAREN_BLPAREN); + + operators.push_back(&AS_PLUS); + operators.push_back(&AS_MINUS); + operators.push_back(&AS_MULT); + operators.push_back(&AS_DIV); + operators.push_back(&AS_MOD); + operators.push_back(&AS_QUESTION); + operators.push_back(&AS_COLON); + operators.push_back(&AS_ASSIGN); + operators.push_back(&AS_LS); + operators.push_back(&AS_GR); + operators.push_back(&AS_NOT); + operators.push_back(&AS_BIT_OR); + operators.push_back(&AS_BIT_AND); + operators.push_back(&AS_BIT_NOT); + operators.push_back(&AS_BIT_XOR); + operators.push_back(&AS_OPERATOR); + operators.push_back(&AS_COMMA); + //BEGIN Content Patch patch1_ssvb_patch.tar.gz + operators.push_back(&AS_SEMICOLON); + //END Content Patch patch1_ssvb_patch.tar.gz + operators.push_back(&AS_RETURN); + + assignmentOperators.push_back(&AS_PLUS_ASSIGN); + assignmentOperators.push_back(&AS_MINUS_ASSIGN); + assignmentOperators.push_back(&AS_MULT_ASSIGN); + assignmentOperators.push_back(&AS_DIV_ASSIGN); + assignmentOperators.push_back(&AS_MOD_ASSIGN); + assignmentOperators.push_back(&AS_XOR_ASSIGN); + assignmentOperators.push_back(&AS_OR_ASSIGN); + assignmentOperators.push_back(&AS_AND_ASSIGN); + assignmentOperators.push_back(&AS_GR_GR_GR_ASSIGN); + assignmentOperators.push_back(&AS_LS_LS_LS_ASSIGN); + assignmentOperators.push_back(&AS_ASSIGN); + } + + /** + * initialize the ASFormatter. + * + * init() should be called every time a ASFormatter object is to start + * formatting a NEW source file. + * init() recieves a pointer to a DYNAMICALLY CREATED ASSourceIterator object + * that will be used to iterate through the source code. This object will be + * deleted during the ASFormatter's destruction, and thus should not be + * deleted elsewhere. + * + * @param iter a pointer to the DYNAMICALLY CREATED ASSourceIterator object. + */ + void ASFormatter::init(ASSourceIterator *si) + { + ASBeautifier::init(si); + sourceIterator = si; + + INIT_CONTAINER( preBracketHeaderStack, new vector<const string*> ); + INIT_CONTAINER( bracketTypeStack, new vector<BracketType> ); + bracketTypeStack->push_back(DEFINITION_TYPE); + INIT_CONTAINER( parenStack, new vector<int> ); + parenStack->push_back(0); + + currentHeader = NULL; + currentLine = string(""); + formattedLine = ""; + currentChar = ' '; + previousCommandChar = ' '; + previousNonWSChar = ' '; + quoteChar = '"'; + charNum = 0; + previousOperator = NULL; + + isVirgin = true; + isInLineComment = false; + isInComment = false; + isInPreprocessor = false; + doesLineStartComment = false; + isInQuote = false; + isSpecialChar = false; + isNonParenHeader = true; + foundPreDefinitionHeader = false; + foundPreCommandHeader = false; + foundQuestionMark = false; + isInLineBreak = false; + endOfCodeReached = false; + isLineReady = false; + isPreviousBracketBlockRelated = true; + isInPotentialCalculation = false; + //foundOneLineBlock = false; + shouldReparseCurrentChar = false; + passedSemicolon = false; + passedColon = false; + isInTemplate = false; + shouldBreakLineAfterComments = false; + isImmediatelyPostComment = false; + isImmediatelyPostLineComment = false; + isImmediatelyPostEmptyBlock = false; + + isPrependPostBlockEmptyLineRequested = false; + isAppendPostBlockEmptyLineRequested = false; + prependEmptyLine = false; + + foundClosingHeader = false; + previousReadyFormattedLineLength = 0; + + isImmediatelyPostHeader = false; + isInHeader = false; + } + + /** + * get the next formatted line. + * + * @return formatted line. + */ + + string ASFormatter::nextLine() + { + const string *newHeader; + bool isCharImmediatelyPostComment = false; + bool isPreviousCharPostComment = false; + bool isCharImmediatelyPostLineComment = false; + bool isInVirginLine = isVirgin; + bool isCharImmediatelyPostOpenBlock = false; + bool isCharImmediatelyPostCloseBlock = false; + bool isCharImmediatelyPostTemplate = false; + bool isCharImmediatelyPostHeader = false; + + if (!isFormattingEnabled()) + return ASBeautifier::nextLine(); + + while (!isLineReady) + { + if (shouldReparseCurrentChar) + shouldReparseCurrentChar = false; + else if (!getNextChar()) + { + breakLine(); + return beautify(readyFormattedLine); + } + else // stuff to do when reading a new character... + { + // make sure that a virgin '{' at the begining ofthe file will be treated as a block... + if (isInVirginLine && currentChar == '{') + previousCommandChar = '{'; + isPreviousCharPostComment = isCharImmediatelyPostComment; + isCharImmediatelyPostComment = false; + isCharImmediatelyPostTemplate = false; + isCharImmediatelyPostHeader = false; + } + + if (isInLineComment) + { + appendCurrentChar(); + + // explicitely break a line when a line comment's end is found. + if (/*bracketFormatMode == ATTACH_MODE &&*/ charNum+1 == currentLine.length()) + { + isInLineBreak = true; + isInLineComment = false; + isImmediatelyPostLineComment = true; + currentChar = 0; //make sure it is a neutral char. + } + continue; + } + else if (isInComment) + { + if (isSequenceReached(AS_CLOSE_COMMENT)) + { + isInComment = false; + isImmediatelyPostComment = true; + appendSequence(AS_CLOSE_COMMENT); + goForward(1); + } + else + appendCurrentChar(); + + continue; + } + + // not in line comment or comment + + else if (isInQuote) + { + if (isSpecialChar) + { + isSpecialChar = false; + appendCurrentChar(); + } + else if (currentChar == '\\') + { + isSpecialChar = true; + appendCurrentChar(); + } + else if (quoteChar == currentChar) + { + isInQuote = false; + appendCurrentChar(); + } + else + { + appendCurrentChar(); + } + + continue; + } + + + + // handle white space - needed to simplify the rest. + if (isWhiteSpace(currentChar) || isInPreprocessor) + { + ////// DEVEL: if (isLegalNameChar(previousChar) && isLegalNameChar(peekNextChar())) + appendCurrentChar(); + continue; + } + + /* not in MIDDLE of quote or comment or white-space of any type ... */ + + if (isSequenceReached(AS_OPEN_LINE_COMMENT)) + { + isInLineComment = true; + if (shouldPadOperators) + appendSpacePad(); + appendSequence(AS_OPEN_LINE_COMMENT); + goForward(1); + continue; + } + else if (isSequenceReached(AS_OPEN_COMMENT)) + { + isInComment = true; + if (shouldPadOperators) + appendSpacePad(); + appendSequence(AS_OPEN_COMMENT); + goForward(1); + continue; + } + else if (currentChar == '"' || currentChar == '\'') + { + isInQuote = true; + quoteChar = currentChar; + //// if (shouldPadOperators) // BUGFIX: these two lines removed. seem to be unneeded, and interfere with L" + //// appendSpacePad(); // BUFFIX: TODO make sure the removal of these lines doesn't reopen old bugs... + appendCurrentChar(); + continue; + } + + /* not in quote or comment or white-space of any type ... */ + + + // check if in preprocessor + // ** isInPreprocessor will be automatically reset at the begining + // of a new line in getnextChar() + if (currentChar == '#') + isInPreprocessor = true; + + if (isInPreprocessor) + { + appendCurrentChar(); + continue; + } + + /* not in preprocessor ... */ + + if (isImmediatelyPostComment) + { + isImmediatelyPostComment = false; + isCharImmediatelyPostComment = true; + } + + if (isImmediatelyPostLineComment) + { + isImmediatelyPostLineComment = false; + isCharImmediatelyPostLineComment = true; + } + + if (shouldBreakLineAfterComments) + { + shouldBreakLineAfterComments = false; + shouldReparseCurrentChar = true; + breakLine(); + continue; + } + + // reset isImmediatelyPostHeader information + if (isImmediatelyPostHeader) + { + isImmediatelyPostHeader = false; + isCharImmediatelyPostHeader = true; + + // Make sure headers are broken from their succeeding blocks + // (e.g. + // if (isFoo) DoBar(); + // should become + // if (isFoo) + // DoBar; + // ) + // But treat else if() as a special case which should not be broken! + if (shouldBreakOneLineStatements) + { + // if may break 'else if()'s, ythen simply break the line + + if (shouldBreakElseIfs) + isInLineBreak = true; + + else + { + // make sure 'else if()'s are not broken. + + bool isInElseIf = false; + const string *upcomingHeader; + + upcomingHeader = findHeader(headers); + if (currentHeader == &AS_ELSE && upcomingHeader == &AS_IF) + isInElseIf = true; + + if (!isInElseIf) + isInLineBreak = true; ////BUGFIX: SHOULD NOT BE breakLine() !!! + } + } + } + + if (passedSemicolon) + { + passedSemicolon = false; + if (parenStack->back() == 0) + { + shouldReparseCurrentChar = true; + isInLineBreak = true; + continue; + } + } + + if (passedColon) + { + passedColon = false; + if (parenStack->back() == 0) + { + shouldReparseCurrentChar = true; + isInLineBreak = true; + continue; + } + } + + // Check if in template declaration, e.g. foo<bar> or foo<bar,fig> + // If so, set isInTemplate to true + // + if (!isInTemplate && currentChar == '<') + { + int templateDepth = 0; + const string *oper; + for (unsigned int i=charNum; + i< currentLine.length(); + i += (oper ? oper->length() : 1) ) + { + oper = ASBeautifier::findHeader(currentLine, i, operators); + + if (oper == &AS_LS) + { + templateDepth++; + } + else if (oper == &AS_GR) + { + templateDepth--; + if (templateDepth == 0) + { + // this is a template! + // + isInTemplate = true; + break; + } + } + else if (oper == &AS_COMMA // comma, e.g. A<int, char> + || oper == &AS_BIT_AND // reference, e.g. A<int&> + || oper == &AS_MULT // pointer, e.g. A<int*> + || oper == &AS_COLON_COLON) // ::, e.g. std::string + { + continue; + } + else if (!isLegalNameChar(currentLine[i]) && !isWhiteSpace(currentLine[i])) + { + // this is not a template -> leave... + // + isInTemplate = false; + break; + } + } + } + + + // handle parenthesies + // + if (currentChar == '(' || currentChar == '[' || (isInTemplate && currentChar == '<')) + { + parenStack->back()++; + } + else if (currentChar == ')' || currentChar == ']' || (isInTemplate && currentChar == '>')) + { + parenStack->back()--; + if (isInTemplate && parenStack->back() == 0) + { + isInTemplate = false; + isCharImmediatelyPostTemplate = true; + } + + // check if this parenthesis closes a header, e.g. if (...), while (...) + // + if (isInHeader && parenStack->back() == 0) + { + isInHeader = false; + isImmediatelyPostHeader = true; + } + + } + + // handle brackets + // + BracketType bracketType = NULL_TYPE; + + if (currentChar == '{') + { + bracketType = getBracketType(); + foundPreDefinitionHeader = false; + foundPreCommandHeader = false; + + bracketTypeStack->push_back(bracketType); + preBracketHeaderStack->push_back(currentHeader); + currentHeader = NULL; + + isPreviousBracketBlockRelated = !IS_A(bracketType, ARRAY_TYPE); + } + else if (currentChar == '}') + { + // if a request has been made to append a post block empty line, + // but the block exists immediately before a closing bracket, + // then there is not need for the post block empty line. + // + isAppendPostBlockEmptyLineRequested = false; + + if (!bracketTypeStack->empty()) + { + bracketType = bracketTypeStack->back(); + bracketTypeStack->pop_back(); + + isPreviousBracketBlockRelated = !IS_A(bracketType, ARRAY_TYPE); + } + + if (!preBracketHeaderStack->empty()) + { + currentHeader = preBracketHeaderStack->back(); + preBracketHeaderStack->pop_back(); + } + else + currentHeader = NULL; + } + + if (!IS_A(bracketType, ARRAY_TYPE)) + { + + if (currentChar == '{') + { + parenStack->push_back(0); + } + else if (currentChar == '}') + { + if (!parenStack->empty()) + { + parenStack->pop_back(); + } + } + + if (bracketFormatMode != NONE_MODE) + { + if (currentChar == '{') + { + if ( ( bracketFormatMode == ATTACH_MODE + || bracketFormatMode == BDAC_MODE && bracketTypeStack->size()>=2 + && IS_A((*bracketTypeStack)[bracketTypeStack->size()-2], COMMAND_TYPE) /*&& isInLineBreak*/) + && !isCharImmediatelyPostLineComment ) + { + appendSpacePad(); + if (!isCharImmediatelyPostComment // do not attach '{' to lines that end with /**/ comments. + && previousCommandChar != '{' + && previousCommandChar != '}' + && previousCommandChar != ';') // '}' , ';' chars added for proper handling of '{' immediately after a '}' or ';' + appendCurrentChar(false); + else + appendCurrentChar(true); + continue; + } + else if (bracketFormatMode == BREAK_MODE + || bracketFormatMode == BDAC_MODE && bracketTypeStack->size()>=2 + && IS_A((*bracketTypeStack)[bracketTypeStack->size()-2], DEFINITION_TYPE)) + { + if ( shouldBreakOneLineBlocks || !IS_A(bracketType, SINGLE_LINE_TYPE) ) + breakLine(); + appendCurrentChar(); + continue; + } + } + else if (currentChar == '}') + { + // bool origLineBreak = isInLineBreak; + + // mark state of immediately after empty block + // this state will be used for locating brackets that appear immedately AFTER an empty block (e.g. '{} \n}'). + if (previousCommandChar == '{') + isImmediatelyPostEmptyBlock = true; + + if ( (!(previousCommandChar == '{' && isPreviousBracketBlockRelated) ) // this '{' does not close an empty block + && (shouldBreakOneLineBlocks || !IS_A(bracketType, SINGLE_LINE_TYPE)) // astyle is allowed to break on line blocks + && !isImmediatelyPostEmptyBlock) // this '}' does not immediately follow an empty block + { + breakLine(); + appendCurrentChar(); + } + else + { + // Content Patch ASFormatter.cpp.patch.bz2 + // if (!isCharImmediatelyPostComment) + if (!isCharImmediatelyPostComment && + !isCharImmediatelyPostLineComment) + isInLineBreak = false; + appendCurrentChar(); + if (shouldBreakOneLineBlocks || !IS_A(bracketType, SINGLE_LINE_TYPE)) + shouldBreakLineAfterComments = true; + } + + if (shouldBreakBlocks) + { + isAppendPostBlockEmptyLineRequested =true; + } + + continue; + } + } + } + + if ( ( (previousCommandChar == '{' + && isPreviousBracketBlockRelated) + + || (previousCommandChar == '}' + && !isImmediatelyPostEmptyBlock // <-- + && isPreviousBracketBlockRelated + && !isPreviousCharPostComment // <-- Fixes wrongly appended newlines after '}' immediately after comments... 10/9/1999 + && peekNextChar() != ' ')) + + && (shouldBreakOneLineBlocks || !IS_A(bracketTypeStack->back(), SINGLE_LINE_TYPE)) ) + { + isCharImmediatelyPostOpenBlock = (previousCommandChar == '{'); + isCharImmediatelyPostCloseBlock = (previousCommandChar == '}'); + + previousCommandChar = ' '; + isInLineBreak = true; //<---- + } + + // reset block handling flags + isImmediatelyPostEmptyBlock = false; + + // look for headers + if (!isInTemplate) + { + if ( (newHeader = findHeader(headers)) != NULL) + { + foundClosingHeader = false; + const string *previousHeader; + + // recognize closing headers of do..while, if..else, try..catch..finally + if ( (newHeader == &AS_ELSE && currentHeader == &AS_IF) + || (newHeader == &AS_WHILE && currentHeader == &AS_DO) + || (newHeader == &AS_CATCH && currentHeader == &AS_TRY) + || (newHeader == &AS_CATCH && currentHeader == &AS_CATCH) + || (newHeader == &AS_FINALLY && currentHeader == &AS_TRY) + || (newHeader == &AS_FINALLY && currentHeader == &AS_CATCH) ) + foundClosingHeader = true; + + previousHeader = currentHeader; + currentHeader = newHeader; + + // If in ATTACH or LINUX bracket modes, attach closing headers (e.g. 'else', 'catch') + // to their preceding bracket, + // But do not perform the attachment if the shouldBreakClosingHeaderBrackets is set! + if (!shouldBreakClosingHeaderBrackets && foundClosingHeader && (bracketFormatMode == ATTACH_MODE || bracketFormatMode == BDAC_MODE) && previousNonWSChar == '}') + { + isInLineBreak = false; + appendSpacePad(); + + if (shouldBreakBlocks) + isAppendPostBlockEmptyLineRequested = false; + } + + //Check if a template definition as been reached, e.g. template<class A> + if (newHeader == &AS_TEMPLATE) + { + isInTemplate = true; + } + + // check if the found header is non-paren header + isNonParenHeader = ( find(nonParenHeaders.begin(), nonParenHeaders.end(), + newHeader) != nonParenHeaders.end() ); + appendSequence(*currentHeader); + goForward(currentHeader->length() - 1); + // if padding is on, and a paren-header is found + // then add a space pad after it. + if (shouldPadOperators && !isNonParenHeader) + appendSpacePad(); + + + // Signal that a header has been reached + // *** But treat a closing while() (as in do...while) + // as if it where NOT a header since a closing while() + // should never have a block after it! + if (!(foundClosingHeader && currentHeader == &AS_WHILE)) + { + isInHeader = true; + if (isNonParenHeader) + { + isImmediatelyPostHeader = true; + isInHeader = false; + } + } + + if (currentHeader == &AS_IF && previousHeader == &AS_ELSE) + isInLineBreak = false; + + if (shouldBreakBlocks) + { + if (previousHeader == NULL + && !foundClosingHeader + && !isCharImmediatelyPostOpenBlock) + { + isPrependPostBlockEmptyLineRequested = true; + } + + if (currentHeader == &AS_ELSE + || currentHeader == &AS_CATCH + || currentHeader == &AS_FINALLY + || foundClosingHeader) + { + isPrependPostBlockEmptyLineRequested = false; + } + + if (shouldBreakClosingHeaderBlocks + && isCharImmediatelyPostCloseBlock) + { + isPrependPostBlockEmptyLineRequested = true; + } + + } + + continue; + } + else if ( (newHeader = findHeader(preDefinitionHeaders)) != NULL) + { + foundPreDefinitionHeader = true; + appendSequence(*newHeader); + goForward(newHeader->length() - 1); + + if (shouldBreakBlocks) + isPrependPostBlockEmptyLineRequested = true; + + continue; + } + else if ( (newHeader = findHeader(preCommandHeaders)) != NULL) + { + foundPreCommandHeader = true; + appendSequence(*newHeader); + goForward(newHeader->length() - 1); + + continue; + } + } + + if (previousNonWSChar == '}' || currentChar == ';') + { + if (shouldBreakOneLineStatements && currentChar == ';' + && (shouldBreakOneLineBlocks || !IS_A(bracketTypeStack->back(), SINGLE_LINE_TYPE))) + { + passedSemicolon = true; + } + + if (shouldBreakBlocks && currentHeader != NULL && parenStack->back() == 0) + { + isAppendPostBlockEmptyLineRequested = true; + } + + if (currentChar != ';') + currentHeader = NULL; //DEVEL: is this ok? + + foundQuestionMark = false; + foundPreDefinitionHeader = false; + foundPreCommandHeader = false; + isInPotentialCalculation = false; + + } + + if (currentChar == ':' + && shouldBreakOneLineStatements + && !foundQuestionMark // not in a ... ? ... : ... sequence + && !foundPreDefinitionHeader // not in a definition block (e.g. class foo : public bar + && previousCommandChar != ')' // not immediately after closing paren of a method header, e.g. ASFormatter::ASFormatter(...) : ASBeautifier(...) + && previousChar != ':' // not part of '::' + && peekNextChar() != ':') // not part of '::' + { + passedColon = true; + if (shouldBreakBlocks) + isPrependPostBlockEmptyLineRequested = true; + } + + if (currentChar == '?') + foundQuestionMark = true; + + if (shouldPadOperators) + { + if ((newHeader = findHeader(operators)) != NULL) + { + bool shouldPad = (newHeader != &AS_COLON_COLON + && newHeader != &AS_PAREN_PAREN + && newHeader != &AS_BLPAREN_BLPAREN + && newHeader != &AS_PLUS_PLUS + && newHeader != &AS_MINUS_MINUS + && newHeader != &AS_NOT + && newHeader != &AS_BIT_NOT + && newHeader != &AS_ARROW + && newHeader != &AS_OPERATOR + && !(newHeader == &AS_MINUS && isInExponent()) + && !(newHeader == &AS_PLUS && isInExponent()) + && previousOperator != &AS_OPERATOR + && !((newHeader == &AS_MULT || newHeader == &AS_BIT_AND) + && isPointerOrReference()) + && !( (isInTemplate || isCharImmediatelyPostTemplate) + && (newHeader == &AS_LS || newHeader == &AS_GR)) + ); + + if (!isInPotentialCalculation) + if (find(assignmentOperators.begin(), assignmentOperators.end(), newHeader) + != assignmentOperators.end()) + isInPotentialCalculation = true; + + // pad before operator + if (shouldPad + && !(newHeader == &AS_COLON && !foundQuestionMark) + && newHeader != &AS_SEMICOLON + && newHeader != &AS_COMMA) + appendSpacePad(); + appendSequence(*newHeader); + goForward(newHeader->length() - 1); + + // since this block handles '()' and '[]', + // the parenStack must be updated here accordingly! + if (newHeader == &AS_PAREN_PAREN + || newHeader == &AS_BLPAREN_BLPAREN) + parenStack->back()--; + + currentChar = (*newHeader)[newHeader->length() - 1]; + // pad after operator + // but do not pad after a '-' that is a urinary-minus. + if ( shouldPad && !(newHeader == &AS_MINUS && isUrinaryMinus()) ) + appendSpacePad(); + + previousOperator = newHeader; + continue; + } + } + //BEGIN Content Patch patch1_ssvb_patch.tar.gz + if (currentChar == '(' || currentChar == '[' ) + isInPotentialCalculation = true; + //END Content Patch patch1_ssvb_patch.tar.gz + if (shouldPadParenthesies) + { + if (currentChar == '(' || currentChar == '[' ) + { + char peekedChar = peekNextChar(); + + isInPotentialCalculation = true; + appendCurrentChar(); + if (!(currentChar == '(' && peekedChar == ')') + && !(currentChar == '[' && peekedChar == ']')) + appendSpacePad(); + continue; + } + else if (currentChar == ')' || currentChar == ']') + { + char peekedChar = peekNextChar(); + + if (!(previousChar == '(' && currentChar == ')') + && !(previousChar == '[' && currentChar == ']')) + appendSpacePad(); + + appendCurrentChar(); + + if (peekedChar != ';' && peekedChar != ',' && peekedChar != '.' + && !(currentChar == ']' && peekedChar == '[')) + appendSpacePad(); + continue; + } + } + + appendCurrentChar(); + } + + // return a beautified (i.e. correctly indented) line. + + string beautifiedLine; + int readyFormattedLineLength = trim(readyFormattedLine).length(); + + if (prependEmptyLine + && readyFormattedLineLength > 0 + && previousReadyFormattedLineLength > 0) + { + isLineReady = true; // signal that a readyFormattedLine is still waiting + beautifiedLine = beautify(""); + } + else + { + isLineReady = false; + beautifiedLine = beautify(readyFormattedLine); + } + + prependEmptyLine = false; + previousReadyFormattedLineLength = readyFormattedLineLength; + + return beautifiedLine; + + } + + + /** + * check if there are any indented lines ready to be read by nextLine() + * + * @return are there any indented lines ready? + */ + bool ASFormatter::hasMoreLines() const + { + if (!isFormattingEnabled()) + return ASBeautifier::hasMoreLines(); + else + return !endOfCodeReached; + } + + /** + * check if formatting options are enabled, in addition to indentation. + * + * @return are formatting options enabled? + */ + bool ASFormatter::isFormattingEnabled() const + { + return (bracketFormatMode != NONE_MODE + || shouldPadOperators + || shouldConvertTabs); + } + + /** + * set the bracket formatting mode. + * options: + * astyle::NONE_MODE no formatting of brackets. + * astyle::ATTACH_MODE Java, K&R style bracket placement. + * astyle::BREAK_MODE ANSI C/C++ style bracket placement. + * + * @param mode the bracket formatting mode. + */ + void ASFormatter::setBracketFormatMode(BracketMode mode) + { + bracketFormatMode = mode; + } + + /** + * set closing header bracket breaking mode + * options: + * true brackets just before closing headers (e.g. 'else', 'catch') + * will be broken, even if standard brackets are attached. + * false closing header brackets will be treated as standard brackets. + * + * @param mode the closing header bracket breaking mode. + */ + void ASFormatter::setBreakClosingHeaderBracketsMode(bool state) + { + shouldBreakClosingHeaderBrackets = state; + } + + /** + * set 'else if()' breaking mode + * options: + * true 'else' headers will be broken from their succeeding 'if' headers. + * false 'else' headers will be attached to their succeeding 'if' headers. + * + * @param mode the 'else if()' breaking mode. + */ + void ASFormatter::setBreakElseIfsMode(bool state) + { + shouldBreakElseIfs = state; + } + + /** + * set operator padding mode. + * options: + * true statement operators will be padded with spaces around them. + * false statement operators will not be padded. + * + * @param mode the padding mode. + */ + void ASFormatter::setOperatorPaddingMode(bool state) + { + shouldPadOperators = state; + } + + /** + * set parentheies padding mode. + * options: + * true statement parenthesies will be padded with spaces around them. + * false statement parenthesies will not be padded. + * + * @param mode the padding mode. + */ + void ASFormatter::setParenthesisPaddingMode(bool state) + { + shouldPadParenthesies = state; + } + + /** + * set option to break/not break one-line blocks + * + * @param state true = break, false = don't break. + */ + void ASFormatter::setBreakOneLineBlocksMode(bool state) + { + shouldBreakOneLineBlocks = state; + } + + /** + * set option to break/not break lines consisting of multiple statements. + * + * @param state true = break, false = don't break. + */ + void ASFormatter::setSingleStatementsMode(bool state) + { + shouldBreakOneLineStatements = state; + } + + /** + * set option to convert tabs to spaces. + * + * @param state true = convert, false = don't convert. + */ + void ASFormatter::setTabSpaceConversionMode(bool state) + { + shouldConvertTabs = state; + } + + + /** + * set option to break unrelated blocks of code with empty lines. + * + * @param state true = convert, false = don't convert. + */ + void ASFormatter::setBreakBlocksMode(bool state) + { + shouldBreakBlocks = state; + } + + /** + * set option to break closing header blocks of code (such as 'else', 'catch', ...) with empty lines. + * + * @param state true = convert, false = don't convert. + */ + void ASFormatter::setBreakClosingHeaderBlocksMode(bool state) + { + shouldBreakClosingHeaderBlocks = state; + } + + /** + * check if a specific sequence exists in the current placement of the current line + * + * @return whether sequence has been reached. + * @param sequence the sequence to be checked + */ + bool ASFormatter::isSequenceReached(const string &sequence) const + { + return currentLine.COMPARE(charNum, sequence.length(), sequence) == 0; + + } + + /** + * jump over several characters. + * + * @param i the number of characters to jump over. + */ + void ASFormatter::goForward(int i) + { + while (--i >= 0) + getNextChar(); + } + + /** + * peek at the next unread character. + * + * @return the next unread character. + */ + char ASFormatter::peekNextChar() const + { + int peekNum = charNum + 1; + int len = currentLine.length(); + char ch = ' '; + + while (peekNum < len) + { + ch = currentLine[peekNum++]; + if (!isWhiteSpace(ch)) + return ch; + } + + if (shouldConvertTabs && ch == '\t') + ch = ' '; + + return ch; + } + + /** + * check if current placement is before a comment or line-comment + * + * @return is before a comment or line-comment. + */ + bool ASFormatter::isBeforeComment() const + { + int peekNum = charNum + 1; + int len = currentLine.length(); + // char ch = ' '; + bool foundComment = false; + + for (peekNum = charNum + 1; + peekNum < len && isWhiteSpace(currentLine[peekNum]); + ++peekNum) + ; + + if (peekNum < len) + foundComment = ( currentLine.COMPARE(peekNum, 2, AS_OPEN_COMMENT) == 0 + || currentLine.COMPARE(peekNum, 2, AS_OPEN_LINE_COMMENT) == 0 ); + + return foundComment; + } + + /** + * get the next character, increasing the current placement in the process. + * the new character is inserted into the variable currentChar. + * + * @return whether succeded to recieve the new character. + */ + bool ASFormatter::getNextChar() + { + isInLineBreak = false; + bool isAfterFormattedWhiteSpace = false; + + if (shouldPadOperators && !isInComment && !isInLineComment + && !isInQuote && !doesLineStartComment && !isInPreprocessor + && !isBeforeComment()) + { + //BEGIN Content Patch patch1_ssvb_patch.tar.gz + char prevchar = ' '; + char nextchar = peekNextChar(); + + int len = formattedLine.length(); + // if (len > 0 && isWhiteSpace(formattedLine[len-1])) + if (len > 0) prevchar = formattedLine[len-1]; + if (isWhiteSpace(prevchar) || prevchar == '(' || prevchar == '[' || + nextchar == ')' || nextchar == ']') + { + isAfterFormattedWhiteSpace = true; + } + //END Content Patch patch1_ssvb_patch.tar.gz + } + + previousChar = currentChar; + if (!isWhiteSpace(currentChar)) + { + previousNonWSChar = currentChar; + if (!isInComment && !isInLineComment && !isInQuote + && !isSequenceReached(AS_OPEN_COMMENT) + && !isSequenceReached(AS_OPEN_LINE_COMMENT) ) + previousCommandChar = previousNonWSChar; + } + + unsigned int currentLineLength = currentLine.length(); + + if (charNum+1 < currentLineLength + && (!isWhiteSpace(peekNextChar()) || isInComment || isInLineComment)) + { + currentChar = currentLine[++charNum]; + if (isAfterFormattedWhiteSpace) + while (isWhiteSpace(currentChar) && charNum+1 < currentLineLength) + currentChar = currentLine[++charNum]; + + if (shouldConvertTabs && currentChar == '\t') + currentChar = ' '; + + return true; + } + // BEGIN Content patch ASFormatter450670.patch.bz2 + else if (isInLineComment && (charNum+1 == currentLineLength)) + { + // fix BUG #450670 + currentChar = ' '; + return true; + } + // END Content patch ASFormatter450670.patch.bz2 + else + { + if (sourceIterator->hasMoreLines()) + { + currentLine = sourceIterator->nextLine(); + if (currentLine.length() == 0) + { + /*think*/ currentLine = string(" "); + } + + // unless reading in the first line of the file, + // break a new line. + if (!isVirgin) + isInLineBreak = true; + else + isVirgin = false; + + if (isInLineComment) + isImmediatelyPostLineComment = true; + isInLineComment = false; + + trimNewLine(); + currentChar = currentLine[charNum]; + + // check if is in preprocessor right after the line break and line trimming + if (previousNonWSChar != '\\') + isInPreprocessor = false; + + if (shouldConvertTabs && currentChar == '\t') + currentChar = ' '; + + return true; + } + else + { + endOfCodeReached = true; + return false; + } + } + } + + /** + * jump over the leading white space in the current line, + * IF the line does not begin a comment or is in a preprocessor definition. + */ + void ASFormatter::trimNewLine() + { + unsigned int len = currentLine.length(); + charNum = 0; + + if (isInComment || isInPreprocessor) + return; + + while (isWhiteSpace(currentLine[charNum]) && charNum+1 < len) + ++charNum; + + doesLineStartComment = false; + if (isSequenceReached(string("/*"))) + { + charNum = 0; + doesLineStartComment = true; + } + } + + /** + * append a character to the current formatted line. + * Unless disabled (via canBreakLine == false), first check if a + * line-break has been registered, and if so break the + * formatted line, and only then append the character into + * the next formatted line. + * + * @param ch the character to append. + * @param canBreakLine if true, a registered line-break + */ + void ASFormatter::appendChar(char ch, bool canBreakLine) + { + if (canBreakLine && isInLineBreak) + breakLine(); + formattedLine.append(1, ch); + } + + /** + * append the CURRENT character (curentChar)to the current + * formatted line. Unless disabled (via canBreakLine == false), + * first check if a line-break has been registered, and if so + * break the formatted line, and only then append the character + * into the next formatted line. + * + * @param canBreakLine if true, a registered line-break + */ + void ASFormatter::appendCurrentChar(bool canBreakLine) + { + appendChar(currentChar, canBreakLine); + } + + /** + * append a string sequence to the current formatted line. + * Unless disabled (via canBreakLine == false), first check if a + * line-break has been registered, and if so break the + * formatted line, and only then append the sequence into + * the next formatted line. + * + * @param sequence the sequence to append. + * @param canBreakLine if true, a registered line-break + */ + void ASFormatter::appendSequence(const string &sequence, bool canBreakLine) + { + if (canBreakLine && isInLineBreak) + breakLine(); + formattedLine.append(sequence); + } + + /** + * append a space to the current formattedline, UNLESS the + * last character is already a white-space character. + */ + void ASFormatter::appendSpacePad() + { + int len = formattedLine.length(); + if (len == 0 || !isWhiteSpace(formattedLine[len-1])) + formattedLine.append(1, ' '); + } + + /** + * register a line break for the formatted line. + */ + void ASFormatter::breakLine() + { + isLineReady = true; + isInLineBreak = false; + + // queue an empty line prepend request if one exists + prependEmptyLine = isPrependPostBlockEmptyLineRequested; + + readyFormattedLine = formattedLine; + if (isAppendPostBlockEmptyLineRequested) + { + isAppendPostBlockEmptyLineRequested = false; + isPrependPostBlockEmptyLineRequested = true; + } + else + { + isPrependPostBlockEmptyLineRequested = false; + } + + formattedLine = ""; + } + + /** + * check if the currently reached open-bracket (i.e. '{') + * opens a: + * - a definition type block (such as a class or namespace), + * - a command block (such as a method block) + * - a static array + * this method takes for granted that the current character + * is an opening bracket. + * + * @return the type of the opened block. + */ + BracketType ASFormatter::getBracketType() const + { + BracketType returnVal; + + if (foundPreDefinitionHeader) + returnVal = DEFINITION_TYPE; + else + { + bool isCommandType; + isCommandType = ( foundPreCommandHeader + || ( currentHeader != NULL && isNonParenHeader ) + || ( previousCommandChar == ')' ) + || ( previousCommandChar == ':' && !foundQuestionMark ) + || ( previousCommandChar == ';' ) + || ( ( previousCommandChar == '{' || previousCommandChar == '}') + && isPreviousBracketBlockRelated ) ); + + returnVal = (isCommandType ? COMMAND_TYPE : ARRAY_TYPE); + } + + if (isOneLineBlockReached()) + returnVal = (BracketType) (returnVal | SINGLE_LINE_TYPE); + + return returnVal; + } + + /** + * check if the currently reached '*' or '&' character is + * a pointer-or-reference symbol, or another operator. + * this method takes for granted that the current character + * is either a '*' or '&'. + * + * @return whether current character is a reference-or-pointer + */ + bool ASFormatter::isPointerOrReference() const + { + bool isPR; + isPR = ( !isInPotentialCalculation + || IS_A(bracketTypeStack->back(), DEFINITION_TYPE) + || (!isLegalNameChar(previousNonWSChar) + && previousNonWSChar != ')' + && previousNonWSChar != ']') + ); + + if (!isPR) + { + char nextChar = peekNextChar(); + isPR |= (!isWhiteSpace(nextChar) + && nextChar != '-' + && nextChar != '(' + && nextChar != '[' + && !isLegalNameChar(nextChar)); + } + + return isPR; + } + + + /** + * check if the currently reached '-' character is + * a urinary minus + * this method takes for granted that the current character + * is a '-'. + * + * @return whether the current '-' is a urinary minus. + */ + bool ASFormatter::isUrinaryMinus() const + { + return ( (previousOperator == &AS_RETURN || !isalnum(previousCommandChar)) + && previousCommandChar != '.' + && previousCommandChar != ')' + && previousCommandChar != ']' ); + } + + + /** + * check if the currently reached '-' or '+' character is + * part of an exponent, i.e. 0.2E-5. + * this method takes for granted that the current character + * is a '-' or '+'. + * + * @return whether the current '-' is in an exponent. + */ + bool ASFormatter::isInExponent() const + { + int formattedLineLength = formattedLine.length(); + if (formattedLineLength >= 2) + { + char prevPrevFormattedChar = formattedLine[formattedLineLength - 2]; + char prevFormattedChar = formattedLine[formattedLineLength - 1]; + + return ( (prevFormattedChar == 'e' || prevFormattedChar == 'E') + && (prevPrevFormattedChar == '.' || isdigit(prevPrevFormattedChar)) ); + } + else + return false; + } + + /** + * check if a one-line bracket has been reached, + * i.e. if the currently reached '{' character is closed + * with a complimentry '}' elsewhere on the current line, + *. + * @return has a one-line bracket been reached? + */ + bool ASFormatter::isOneLineBlockReached() const + { + bool isInComment = false; + bool isInQuote = false; + int bracketCount = 1; + int currentLineLength = currentLine.length(); + int i = 0; + char ch = ' '; + char quoteChar = ' '; + + for (i = charNum + 1; i < currentLineLength; ++i) + { + ch = currentLine[i]; + + if (isInComment) + { + if (currentLine.COMPARE(i, 2, "*/") == 0) + { + isInComment = false; + ++i; + } + continue; + } + + if (ch == '\\') + { + ++i; + continue; + } + + if (isInQuote) + { + if (ch == quoteChar) + isInQuote = false; + continue; + } + + if (ch == '"' || ch == '\'') + { + isInQuote = true; + quoteChar = ch; + continue; + } + + if (currentLine.COMPARE(i, 2, "//") == 0) + break; + + if (currentLine.COMPARE(i, 2, "/*") == 0) + { + isInComment = true; + ++i; + continue; + } + + if (ch == '{') + ++bracketCount; + else if (ch == '}') + --bracketCount; + + if(bracketCount == 0) + return true; + } + + return false; + } + + + /** + * check if one of a set of headers has been reached in the + * current position of the current line. + * + * @return a pointer to the found header. Or a NULL if no header has been reached. + * @param headers a vector of headers + * @param checkBoundry + */ + const string *ASFormatter::findHeader(const vector<const string*> &headers, bool checkBoundry) + { + return ASBeautifier::findHeader(currentLine, charNum, headers, checkBoundry); + } + + + +#ifdef USES_NAMESPACE +} +#endif +/* + * Copyright (c) 1998,1999,2000,2001,2002 Tal Davidson. All rights reserved. + * + * compiler_defines.h (1 January 1999) + * by Tal Davidson (davidsont@bigfoot.com) + * This file is a part of "Artistic Style" - an indentater and reformatter + * of C, C++, C# and Java source files. + * + * The "Artistic Style" project, including all files needed to compile it, + * is free software; you can redistribute it and/or use it and/or modify it + * under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU General Public + * License along with this program. + */ + + +#ifndef ASFORMATTER_H +#define ASFORMATTER_H + +#include "ASBeautifier.h" +//#include "enums.h" +#include "compiler_defines.h" + +namespace astyle { + + class ASFormatter : public ASBeautifier + { + public: + ASFormatter(); + virtual ~ASFormatter(); + virtual void init(ASSourceIterator* iter); + virtual bool hasMoreLines() const; + virtual string nextLine(); + void setBracketFormatMode(BracketMode mode); + void setBreakClosingHeaderBracketsMode(bool state); + void setOperatorPaddingMode(bool mode); + void setParenthesisPaddingMode(bool mode); + void setBreakOneLineBlocksMode(bool state); + void setSingleStatementsMode(bool state); + void setTabSpaceConversionMode(bool state); + void setBreakBlocksMode(bool state); + void setBreakClosingHeaderBlocksMode(bool state); + void setBreakElseIfsMode(bool state); + + private: + void ASformatter(ASFormatter ©); // not to be imlpemented + void operator=(ASFormatter&); // not to be implemented + void staticInit(); + bool isFormattingEnabled() const; + void goForward(int i); + bool getNextChar(); + char peekNextChar() const; + bool isBeforeComment() const; + void trimNewLine(); + BracketType getBracketType() const; + bool isPointerOrReference() const; + bool isUrinaryMinus() const; + bool isInExponent() const; + bool isOneLineBlockReached() const; + void appendChar(char ch, bool canBreakLine = true); + void appendCurrentChar(bool canBreakLine = true); + void appendSequence(const string &sequence, bool canBreakLine = true); + void appendSpacePad(); + void breakLine(); + inline bool isSequenceReached(const string &sequence) const; + const string *findHeader(const vector<const string*> &headers, bool checkBoundry = true); + + static vector<const string*> headers; + static vector<const string*> nonParenHeaders; + static vector<const string*> preprocessorHeaders; + static vector<const string*> preDefinitionHeaders; + static vector<const string*> preCommandHeaders; + static vector<const string*> operators; + static vector<const string*> assignmentOperators; + static bool calledInitStatic; + + ASSourceIterator *sourceIterator; + vector<const string*> *preBracketHeaderStack; + vector<BracketType> *bracketTypeStack; + vector<int> *parenStack; + string readyFormattedLine; + string currentLine; + string formattedLine; + const string *currentHeader; + const string *previousOperator; + char currentChar; + char previousChar; + char previousNonWSChar; + char previousCommandChar; + char quoteChar; + unsigned int charNum; + BracketMode bracketFormatMode; + bool isVirgin; + bool shouldPadOperators; + bool shouldPadParenthesies; + bool shouldConvertTabs; + bool isInLineComment; + bool isInComment; + bool isInPreprocessor; + bool isInTemplate; // true both in template definitions (e.g. template<class A>) and template usage (e.g. F<int>). + bool doesLineStartComment; + bool isInQuote; + bool isSpecialChar; + bool isNonParenHeader; + bool foundQuestionMark; + bool foundPreDefinitionHeader; + bool foundPreCommandHeader; + bool isInLineBreak; + bool isInClosingBracketLineBreak; + bool endOfCodeReached; + bool isLineReady; + bool isPreviousBracketBlockRelated; + bool isInPotentialCalculation; + //bool foundOneLineBlock; + bool shouldBreakOneLineBlocks; + bool shouldReparseCurrentChar; + bool shouldBreakOneLineStatements; + bool shouldBreakLineAfterComments; + bool shouldBreakClosingHeaderBrackets; + bool shouldBreakElseIfs; + bool passedSemicolon; + bool passedColon; + bool isImmediatelyPostComment; + bool isImmediatelyPostLineComment; + bool isImmediatelyPostEmptyBlock; + + bool shouldBreakBlocks; + bool shouldBreakClosingHeaderBlocks; + bool isPrependPostBlockEmptyLineRequested; + bool isAppendPostBlockEmptyLineRequested; + + bool prependEmptyLine; + bool foundClosingHeader; + int previousReadyFormattedLineLength; + + bool isInHeader; + bool isImmediatelyPostHeader; + + }; + +} + +#endif +/* +* Copyright (c) 1998,1999,2000,2001,2002 Tal Davidson. All rights reserved. +* +* ASResource.cpp +* by Tal Davidson (davidsont@bigfoot.com) +* This file is a part of "Artistic Style" - an indentater and reformatter +* of C, C, C# and Java source files. +* + * The "Artistic Style" project, including all files needed to compile it, + * is free software; you can redistribute it and/or use it and/or modify it + * under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU General Public + * License along with this program. +*/ + +#include "compiler_defines.h" +#include "ASResource.h" + +#include <string> + + +#ifdef USES_NAMESPACE +using namespace std; + +namespace astyle + { +#endif + + const string ASResource::AS_IF = string("if"); + const string ASResource::AS_ELSE = string ("else"); + const string ASResource::AS_FOR = string("for"); + const string ASResource::AS_DO = string("do"); + const string ASResource::AS_WHILE = string("while"); + const string ASResource::AS_SWITCH = string ("switch"); + const string ASResource::AS_CASE = string ("case"); + const string ASResource::AS_DEFAULT = string("default"); + const string ASResource::AS_CLASS = string("class"); + const string ASResource::AS_STRUCT = string("struct"); + const string ASResource::AS_UNION = string("union"); + const string ASResource::AS_INTERFACE = string("interface"); + const string ASResource::AS_NAMESPACE = string("namespace"); + const string ASResource::AS_EXTERN = string("extern"); + const string ASResource::AS_PUBLIC = string("public"); + const string ASResource::AS_PROTECTED = string("protected"); + const string ASResource::AS_PRIVATE = string("private"); + const string ASResource::AS_STATIC = string("static"); + const string ASResource::AS_SYNCHRONIZED = string("synchronized"); + const string ASResource::AS_OPERATOR = string("operator"); + const string ASResource::AS_TEMPLATE = string("template"); + const string ASResource::AS_TRY = string("try"); + const string ASResource::AS_CATCH = string("catch"); + const string ASResource::AS_FINALLY = string("finally"); + const string ASResource::AS_THROWS = string("throws"); + const string ASResource::AS_CONST = string("const"); + + const string ASResource::AS_ASM = string("asm"); + + const string ASResource::AS_BAR_DEFINE = string("#define"); + const string ASResource::AS_BAR_INCLUDE = string("#include"); + const string ASResource::AS_BAR_IF = string("#if"); + const string ASResource::AS_BAR_EL = string("#el"); + const string ASResource::AS_BAR_ENDIF = string("#endif"); + + const string ASResource::AS_OPEN_BRACKET = string("{"); + const string ASResource::AS_CLOSE_BRACKET = string("}"); + const string ASResource::AS_OPEN_LINE_COMMENT = string("//"); + const string ASResource::AS_OPEN_COMMENT = string("/*"); + const string ASResource::AS_CLOSE_COMMENT = string("*/"); + + const string ASResource::AS_ASSIGN = string("="); + const string ASResource::AS_PLUS_ASSIGN = string("+="); + const string ASResource::AS_MINUS_ASSIGN = string("-="); + const string ASResource::AS_MULT_ASSIGN = string("*="); + const string ASResource::AS_DIV_ASSIGN = string("/="); + const string ASResource::AS_MOD_ASSIGN = string("%="); + const string ASResource::AS_OR_ASSIGN = string("|="); + const string ASResource::AS_AND_ASSIGN = string("&="); + const string ASResource::AS_XOR_ASSIGN = string("^="); + const string ASResource::AS_GR_GR_ASSIGN = string(">>="); + const string ASResource::AS_LS_LS_ASSIGN = string("<<="); + const string ASResource::AS_GR_GR_GR_ASSIGN = string(">>>="); + const string ASResource::AS_LS_LS_LS_ASSIGN = string("<<<="); + const string ASResource::AS_RETURN = string("return"); + + const string ASResource::AS_EQUAL = string("=="); + const string ASResource::AS_PLUS_PLUS = string("++"); + const string ASResource::AS_MINUS_MINUS = string("--"); + const string ASResource::AS_NOT_EQUAL = string("!="); + const string ASResource::AS_GR_EQUAL = string(">="); + const string ASResource::AS_GR_GR = string(">>"); + const string ASResource::AS_GR_GR_GR = string(">>>"); + const string ASResource::AS_LS_EQUAL = string("<="); + const string ASResource::AS_LS_LS = string("<<"); + const string ASResource::AS_LS_LS_LS = string("<<<"); + const string ASResource::AS_ARROW = string("->"); + const string ASResource::AS_AND = string("&&"); + const string ASResource::AS_OR = string("||"); + const string ASResource::AS_COLON_COLON = string("::"); + const string ASResource::AS_PAREN_PAREN = string("()"); + const string ASResource::AS_BLPAREN_BLPAREN = string("[]"); + + const string ASResource::AS_PLUS = string("+"); + const string ASResource::AS_MINUS = string("-"); + const string ASResource::AS_MULT = string("*"); + const string ASResource::AS_DIV = string("/"); + const string ASResource::AS_MOD = string("%"); + const string ASResource::AS_GR = string(">"); + const string ASResource::AS_LS = string("<"); + const string ASResource::AS_NOT = string("!"); + const string ASResource::AS_BIT_OR = string("|"); + const string ASResource::AS_BIT_AND = string("&"); + const string ASResource::AS_BIT_NOT = string("~"); + const string ASResource::AS_BIT_XOR = string("^"); + const string ASResource::AS_QUESTION = string("?"); + const string ASResource::AS_COLON = string(":"); + const string ASResource::AS_COMMA = string(","); + const string ASResource::AS_SEMICOLON = string(";"); + + const string ASResource::AS_FOREACH = string("foreach"); + const string ASResource::AS_LOCK = string("lock"); + const string ASResource::AS_UNSAFE = string("unsafe"); + const string ASResource::AS_FIXED = string("fixed"); + const string ASResource::AS_GET = string("get"); + const string ASResource::AS_SET = string("set"); + const string ASResource::AS_ADD = string("add"); + const string ASResource::AS_REMOVE = string("remove"); + +#ifdef USES_NAMESPACE +} +#endif + + +/* + * Copyright (c) 1998,1999,2000,2001,2002 Tal Davidson. All rights reserved. + * + * compiler_defines.h (1 January 1999) + * by Tal Davidson (davidsont@bigfoot.com) + * This file is a part of "Artistic Style" - an indentater and reformatter + * of C, C++, C# and Java source files. + * + * The "Artistic Style" project, including all files needed to compile it, + * is free software; you can redistribute it and/or use it and/or modify it + * under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU General Public + * License along with this program. + */ + + +#ifndef ASRES_H +#define ASRES_H + +#include "compiler_defines.h" +#include "ASStreamIterator.h" + +#include <iostream> +#include <fstream> +#include <string> + +namespace astyle { + +class ASResource + { + public: + static const string AS_IF, AS_ELSE; + static const string AS_DO, AS_WHILE; + static const string AS_FOR; + static const string AS_SWITCH, AS_CASE, AS_DEFAULT; + static const string AS_TRY, AS_CATCH, AS_THROWS, AS_FINALLY; + static const string AS_PUBLIC, AS_PROTECTED, AS_PRIVATE; + static const string AS_CLASS, AS_STRUCT, AS_UNION, AS_INTERFACE, AS_NAMESPACE, AS_EXTERN; + static const string AS_STATIC; + static const string AS_CONST; + static const string AS_SYNCHRONIZED; + static const string AS_OPERATOR, AS_TEMPLATE; + static const string AS_OPEN_BRACKET, AS_CLOSE_BRACKET; + static const string AS_OPEN_LINE_COMMENT, AS_OPEN_COMMENT, AS_CLOSE_COMMENT; + static const string AS_BAR_DEFINE, AS_BAR_INCLUDE, AS_BAR_IF, AS_BAR_EL, AS_BAR_ENDIF; + static const string AS_RETURN; + static const string AS_ASSIGN, AS_PLUS_ASSIGN, AS_MINUS_ASSIGN, AS_MULT_ASSIGN; + static const string AS_DIV_ASSIGN, AS_MOD_ASSIGN, AS_XOR_ASSIGN, AS_OR_ASSIGN, AS_AND_ASSIGN; + static const string AS_GR_GR_ASSIGN, AS_LS_LS_ASSIGN, AS_GR_GR_GR_ASSIGN, AS_LS_LS_LS_ASSIGN; + static const string AS_EQUAL, AS_PLUS_PLUS, AS_MINUS_MINUS, AS_NOT_EQUAL, AS_GR_EQUAL, AS_GR_GR_GR, AS_GR_GR; + static const string AS_LS_EQUAL, AS_LS_LS_LS, AS_LS_LS, AS_ARROW, AS_AND, AS_OR; + static const string AS_COLON_COLON, AS_PAREN_PAREN, AS_BLPAREN_BLPAREN; + static const string AS_PLUS, AS_MINUS, AS_MULT, AS_DIV, AS_MOD, AS_GR, AS_LS; + static const string AS_NOT, AS_BIT_XOR, AS_BIT_OR, AS_BIT_AND, AS_BIT_NOT; + static const string AS_QUESTION, AS_COLON, AS_SEMICOLON, AS_COMMA; + static const string AS_ASM; + static const string AS_FOREACH, AS_LOCK, AS_UNSAFE, AS_FIXED; + static const string AS_GET, AS_SET, AS_ADD, AS_REMOVE; + }; +} +#endif + +#ifndef ASSOURCEITERATOR_H +#define ASSOURCEITERATOR_H + +#include <string> +#include "compiler_defines.h" + +namespace astyle + { + + class ASSourceIterator + { + public: + virtual bool hasMoreLines() const = 0; + virtual std::string nextLine() = 0; + }; +} + +#endif +#include "compiler_defines.h"
+#include "ASStreamIterator.h"
+
+#include <iostream>
+#include <fstream>
+#include <string>
+
+using namespace astyle;
+
+ASStreamIterator::ASStreamIterator(istream *in)
+{
+ inStream = in;
+}
+
+ASStreamIterator::~ASStreamIterator()
+{
+ delete inStream;
+}
+
+
+bool ASStreamIterator::hasMoreLines() const
+ {
+ if (*inStream)
+ return true;
+ else
+ return false;
+ }
+
+/*
+string ASStreamIterator::nextLine()
+{
+ char theInChar;
+ char peekedChar;
+ int theBufferPosn = 0;
+
+ //
+ // treat '\n', '\r', '\n\r' and '\r\n' as an endline.
+ //
+ while (theBufferPosn < 2047 && inStream->get(theInChar))
+ // while not eof
+ {
+ if (theInChar != '\n' && theInChar != '\r')
+ {
+ buffer[theBufferPosn] = theInChar;
+ theBufferPosn++;
+ }
+ else
+ {
+ peekedChar = inStream->peek();
+ if (peekedChar != theInChar && (peekedChar == '\r' || peekedChar == '\n') )
+ {
+ inStream->get(theInChar);
+ }
+ break;
+ }
+ }
+ buffer[theBufferPosn] = '\0';
+
+ return string(buffer);
+}
+*/
+
+
+string ASStreamIterator::nextLine()
+{
+ char *srcPtr;
+ char *filterPtr;
+
+ inStream->getline(buffer, 2047);
+ srcPtr = filterPtr = buffer;
+
+ while (*srcPtr != 0)
+ {
+ if (*srcPtr != '\r')
+ *filterPtr++ = *srcPtr;
+ srcPtr++;
+ }
+ *filterPtr = 0;
+
+ return string(buffer);
+}
+
+/* + * Copyright (c) 1998,1999,2000,2001,2002 Tal Davidson. All rights reserved. + * + * compiler_defines.h (1 January 1999) + * by Tal Davidson (davidsont@bigfoot.com) + * This file is a part of "Artistic Style" - an indentater and reformatter + * of C, C++, C# and Java source files. + * + * The "Artistic Style" project, including all files needed to compile it, + * is free software; you can redistribute it and/or use it and/or modify it + * under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU General Public + * License along with this program. + */ + + +#ifndef ASSTREAMITERATOR_H +#define ASSTREAMITERATOR_H + +#include "ASSourceIterator.h" + +using namespace std; + +namespace astyle + { + class ASStreamIterator : + public ASSourceIterator + { + public: + ASStreamIterator(istream *in); + virtual ~ASStreamIterator(); + bool hasMoreLines() const; + string nextLine(); + + private: + istream * inStream; + char buffer[2048]; + }; + +} + +#endif +/*************************************************************************** + charcodes.cpp - description + ------------------- + begin : Wed Nov 24 2003 + copyright : (C) 2003 by André imon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +// FILE SHOULD BE REMOVED FROM PROJECT + +#ifndef CHAR_CODES +#define CHAR_CODES + +#ifdef _WIN32 + +#define AUML_LC 228 +#define OUML_LC 246 +#define UUML_LC 252 + +#define AUML_UC 196 +#define OUML_UC 214 +#define UUML_UC 220 + + +#define AACUTE_LC 225 +#define EACUTE_LC 233 +#define OACUTE_LC 243 +#define UACUTE_LC 250 + +#define AACUTE_UC 193 +#define EACUTE_UC 201 +#define OACUTE_UC 211 +#define UACUTE_UC 218 + +#define AGRAVE_LC 224 +#define EGRAVE_LC 232 +#define OGRAVE_LC 242 +#define UGRAVE_LC 249 + +#define AGRAVE_UC 192 +#define EGRAVE_UC 200 +#define OGRAVE_UC 210 +#define UGRAVE_UC 217 + +#define SZLIG 223 + +/* DOS CONSOLE CODES +#define AUML_LC 132 +#define OUML_LC 148 +#define UUML_LC 129 + +#define AUML_UC 142 +#define OUML_UC 153 +#define UUML_UC 154 + + +#define AACUTE_LC 160 +#define EACUTE_LC 130 +#define OACUTE_LC 162 +#define UACUTE_LC 163 + +#define AACUTE_UC 181 +#define EACUTE_UC 144 +#define OACUTE_UC 224 +#define UACUTE_UC 233 + +#define AGRAVE_LC 133 +#define EGRAVE_LC 138 +#define OGRAVE_LC 149 +#define UGRAVE_LC 151 + +#define AGRAVE_UC 183 +#define EGRAVE_UC 212 +#define OGRAVE_UC 227 +#define UGRAVE_UC 235 + +#define SZLIG 225 +*/ + +#else + +#define AUML_LC 164 +#define OUML_LC 182 +#define UUML_LC 188 + +#define AUML_UC 132 +#define OUML_UC 150 +#define UUML_UC 156 + + +#define AACUTE_LC 161 +#define EACUTE_LC 169 +#define OACUTE_LC 179 +#define UACUTE_LC 186 + +#define AACUTE_UC 129 +#define EACUTE_UC 137 +#define OACUTE_UC 147 +#define UACUTE_UC 154 + +#define AGRAVE_LC 160 +#define EGRAVE_LC 168 +#define OGRAVE_LC 178 +#define UGRAVE_LC 185 + +#define AGRAVE_UC 128 +#define EGRAVE_UC 136 +#define OGRAVE_UC 146 +#define UGRAVE_UC 153 + +#define SZLIG 159 + +#endif + +#endif +/*************************************************************************** + cmdlineoptions.cpp - description + ------------------- + begin : Sun Nov 25 2001 + copyright : (C) 2001 by André Simon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include "cmdlineoptions.h" + +using namespace std; + +/* Siehe man getopt (3) + Konstruktor legt Optionen und Argumente fest +*/ +CmdLineOptions::CmdLineOptions(int argc, char *argv[]): + numberSpaces(0), + wrappingStyle(highlight::WRAP_DISABLED), + outputType (highlight::HTML), + opt_language (false), + opt_include_style (false), + opt_help (false), + opt_version (false), + opt_verbose (false), + opt_linenumbers (false), + opt_style (false), + opt_batch_mode (false), + opt_fragment (false) , + opt_attach_line_anchors (false), + opt_show_themes (false), + opt_show_langdefs (false), + opt_printindex(false), + opt_quiet(false), + opt_xslfo_fop(false), + opt_replacequotes(false), + opt_print_progress(false), + opt_fill_zeroes(false), + opt_stylepath_explicit(false), + opt_force_output(false), + configFileRead(false), + helpLang("en"), + charset("ISO-8859-1") +{ + + loadConfigurationFile(); + + int c, option_index = 0; + static struct option long_options[] = + { + {OPT_OUT, 1, 0, S_OPT_OUT}, + {OPT_IN, 1, 0, S_OPT_IN}, + {OPT_SYNTAX, 1, 0, S_OPT_SYNTAX}, + {OPT_VERBOSE, 0, 0, S_OPT_VERBOSE}, + {OPT_INC_STYLE, 0, 0, S_OPT_INC_STYLE}, + {OPT_HELP, 0, 0, S_OPT_HELP}, + {OPT_HELPINT, 1, 0, S_OPT_HELPINT}, + {OPT_LINENO,0,0,S_OPT_LINENO}, + {OPT_STYLE, 1,0,S_OPT_STYLE}, + {OPT_STYLE_OUT, 1, 0,S_OPT_STYLE_OUT}, + {OPT_STYLE_IN, 1, 0,S_OPT_STYLE_IN}, + {OPT_DELTABS,1,0,S_OPT_DELTABS}, + {OPT_XHTML, 0,0,S_OPT_XHTML}, + {OPT_RTF, 0,0,S_OPT_RTF}, + {OPT_TEX,0, 0,S_OPT_TEX}, + {OPT_LATEX,0, 0,S_OPT_LATEX}, + {OPT_XSLFO,0, 0,S_OPT_XSLFO}, + {OPT_ANSI,0, 0,S_OPT_ANSI}, + {OPT_XML,0, 0,S_OPT_XML}, + {OPT_BATCHREC,1,0,S_OPT_BATCHREC}, + {OPT_FRAGMENT,0,0,S_OPT_FRAGMENT}, + {OPT_ANCHORS, 0,0,S_OPT_ANCHORS }, + {OPT_LISTTHEMES, 0,0,S_OPT_LISTTHEMES }, + {OPT_LISTLANGS, 0,0,S_OPT_LISTLANGS }, + {OPT_OUTDIR,1,0,S_OPT_OUTDIR}, + {OPT_VERSION,0,0,0}, + {OPT_FORMATSTYLE,1,0,S_OPT_FORMATSTYLE}, + {OPT_DATADIR,1,0,S_OPT_DATADIR}, + {OPT_ADDDATADIR,1,0,S_OPT_ADDDATADIR}, + {OPT_INDEXFILE,0,0,S_OPT_INDEXFILE}, + {OPT_WRAP,0,0,S_OPT_WRAP}, + {OPT_WRAPSIMPLE,0,0,S_OPT_WRAPSIMPLE}, + {OPT_QUIET,0,0,S_OPT_QUIET}, + {OPT_REPLACE_QUOTES,0,0,S_OPT_REPLACE_QUOTES}, + {OPT_PROGRESSBAR,0,0,S_OPT_PROGRESSBAR}, + {OPT_FILLZEROES,0,0,S_OPT_FILLZEROES}, + {OPT_ENCODING,1,0,S_OPT_ENCODING}, + + //remove as soon as APAche fixes the bug in FOP (0.20.5) + {OPT_FOP,0,0,S_OPT_FOP}, + + //deprecated + {OPT_CSSOUT,1,0,0}, + {OPT_CSSIN,1,0,0}, + {OPT_INC_CSS,0,0,0}, + {OPT_FORCE_OUTPUT,0,0,0}, + + {0, 0, 0, 0} + }; + + while (1) + { + c = getopt_long (argc, argv,S_OPTIONS_STRING,long_options, &option_index); + if (c == -1) + break; + + switch (c) + { + case 0: // long options + if (long_options[option_index].name==OPT_VERSION) { + opt_version = true; + } + if (long_options[option_index].name==OPT_CSSOUT) { + styleOutFilename=string(optarg); + printDeprecatedWarning(OPT_CSSOUT, OPT_STYLE_OUT); + } + if (long_options[option_index].name==OPT_CSSIN) { + styleInFilename=string(optarg); + printDeprecatedWarning(OPT_CSSIN, OPT_STYLE_IN); + } + if (long_options[option_index].name==OPT_INC_CSS) { + opt_include_style = true; + printDeprecatedWarning(OPT_INC_CSS, OPT_INC_STYLE); + } + if (long_options[option_index].name==OPT_FORCE_OUTPUT) { + opt_force_output = true; + } + break; + case S_OPT_OUT: + outFilename=string(optarg); + break; + case S_OPT_IN: + inputFileNames.push_back(string(optarg)); + break; + case S_OPT_STYLE_OUT: + styleOutFilename=string(optarg); + opt_stylepath_explicit=true; + break; + case S_OPT_STYLE_IN: + styleInFilename=string(optarg); + break; + case S_OPT_VERBOSE: + opt_verbose = true; + break; + case S_OPT_QUIET: + opt_quiet = true; + break; + case S_OPT_INC_STYLE: + opt_include_style = true; + break; + case S_OPT_HELPINT: + helpLang=string(optarg); + case S_OPT_HELP: + opt_help = true; + break; + case S_OPT_LINENO: + opt_linenumbers = true; + break; + case '?': + //opt_help = true; + break; + case S_OPT_STYLE: + styleName=string(optarg); + opt_style = true; + break; + case S_OPT_SYNTAX: + language=string(optarg); + opt_language = true; + break; + case S_OPT_DELTABS: + numberSpaces = StringTools::str2int (string(optarg)); + break; + case S_OPT_XHTML: + outputType=highlight::XHTML; + break; + case S_OPT_RTF: + outputType=highlight::RTF; + break; + case S_OPT_TEX: + outputType=highlight::TEX; + break; + case S_OPT_LATEX: + outputType=highlight::LATEX; + break; + case S_OPT_XSLFO: + outputType=highlight::XSLFO; + break; + case S_OPT_ANSI: + outputType=highlight::ANSI; + break; + case S_OPT_XML: + outputType=highlight::XML; + break; + case S_OPT_BATCHREC: + opt_batch_mode = true; + readDirectory(string(optarg)); + break; + case S_OPT_FRAGMENT: + opt_fragment = true; + break; + case S_OPT_ANCHORS: + opt_attach_line_anchors = true; + break; + case S_OPT_LISTTHEMES: + opt_show_themes = true; + break; + case S_OPT_LISTLANGS: + opt_show_langdefs = true; + break; + case S_OPT_OUTDIR: + outDirectory = validateDirPath(string(optarg)); + break; + case S_OPT_FORMATSTYLE: + indentScheme =string(optarg); + break; + case S_OPT_ENCODING: + charset =string(optarg); + break; + case S_OPT_DATADIR: + dataDir=validateDirPath(string(optarg)); + break; + case S_OPT_ADDDATADIR: + additionalDataDir=validateDirPath(string(optarg)); + break; + case S_OPT_INDEXFILE: + opt_printindex=true; + break; + case S_OPT_WRAPSIMPLE: + wrappingStyle = highlight::WRAP_SIMPLE; + break; + case S_OPT_WRAP: + wrappingStyle = highlight::WRAP_DEFAULT; + break; + case S_OPT_FOP: + opt_xslfo_fop=true; + break; + case S_OPT_REPLACE_QUOTES: + opt_replacequotes=true; + break; + case S_OPT_PROGRESSBAR: + opt_print_progress=true; + break; + case S_OPT_FILLZEROES: + opt_fill_zeroes=true; + break; + default: + cerr <<"higlight: Unknown option " <<c<< endl; + } + } + + if (optind < argc) //still args left + { + if (inputFileNames.empty()) { + while (optind < argc){ + inputFileNames.push_back(string(argv[optind++])); + } + } + } else if (inputFileNames.empty()) { + inputFileNames.push_back(""); + } + if (printDebugInfo() && configFileRead) { + cout << "Configuration file \""<<configFilePath<<"\" was read.\n"; + } +} + +CmdLineOptions::~CmdLineOptions(){ +} + +const string &CmdLineOptions::getSingleOutFilename() + { + if (!inputFileNames.empty() && !outDirectory.empty()) { + if (outFilename.empty()) { + outFilename = outDirectory; + int delim = getSingleInFilename().find_last_of(Platform::pathSeparator)+1; + outFilename += getSingleInFilename().substr((delim>-1)?delim:0) + + getOutFileSuffix(); + } + } + return outFilename; + } + +const string &CmdLineOptions::getSingleInFilename() const + { + return inputFileNames[0]; + } + +const string &CmdLineOptions::getOutDirectory() + { + if (!outFilename.empty() && !enableBatchMode()){ + outDirectory=getDirName(outFilename); + } + return outDirectory; + } + +const string CmdLineOptions::getStyleOutFilename() const + { + if (!styleOutFilename.empty()) return styleOutFilename; + return (outputType==highlight::HTML || + outputType==highlight::XHTML)? "highlight.css":"highlight.sty"; + } +const string &CmdLineOptions::getStyleInFilename() const + { + return styleInFilename; + } +int CmdLineOptions::getNumberSpaces() const + { + return numberSpaces; + } +bool CmdLineOptions::printVersion()const + { + return opt_version; + } +bool CmdLineOptions::printHelp()const + { + return opt_help; + } +bool CmdLineOptions::printDebugInfo()const + { + return opt_verbose; + } +bool CmdLineOptions::quietMode()const + { + return opt_quiet; + } +bool CmdLineOptions::includeStyleDef()const + { + return opt_include_style; + } + +bool CmdLineOptions::formatSupportsExtStyle(){ + return outputType==highlight::HTML || + outputType==highlight::XHTML || + outputType==highlight::LATEX || + outputType==highlight::TEX; +} + +bool CmdLineOptions::printLineNumbers()const + { + return opt_linenumbers; + } + +string CmdLineOptions::getStyleName()const + { + return ( ( opt_style) ? styleName+".style" : "kwrite.style" ); + } +bool CmdLineOptions::enableBatchMode()const{ + return inputFileNames.size()>1 || opt_batch_mode; +} +bool CmdLineOptions::fragmentOutput()const{ + return opt_fragment; +} +string CmdLineOptions::getOutFileSuffix()const{ + switch (outputType){ + case highlight::XHTML: return ".xhtml"; + case highlight::RTF: return ".rtf"; + case highlight::TEX: + case highlight::LATEX: return ".tex"; + case highlight::XSLFO: return ".fo"; + case highlight::XML: return ".xml"; + default: return ".html"; + } +} +string CmdLineOptions::getDirName(const string & path) { + size_t dirNameLength=path.rfind(Platform::pathSeparator); + return (dirNameLength==string::npos)?string():path.substr(0, dirNameLength+1); +} +bool CmdLineOptions::attachLineAnchors()const{ + return opt_attach_line_anchors; +} +bool CmdLineOptions::showThemes()const{ + return opt_show_themes; +} +bool CmdLineOptions::showLangdefs()const{ + return opt_show_langdefs; +} +bool CmdLineOptions::outDirGiven()const{ + return !outFilename.empty(); +} +bool CmdLineOptions::fopCompatible() const { + return opt_xslfo_fop; +} +bool CmdLineOptions::replaceQuotes() const { + return opt_replacequotes; +} +bool CmdLineOptions::getFlag( const string& paramVal){ + return StringTools::lowerCase(paramVal)=="true"; +} +bool CmdLineOptions::formattingEnabled(){ + return !indentScheme.empty(); +} +bool CmdLineOptions::dataDirGiven()const { + return !dataDir.empty(); +} +bool CmdLineOptions::additionalDataDirGiven()const { + return !additionalDataDir.empty(); +} +const string &CmdLineOptions::getDataDir() const { + return dataDir; +} +const string &CmdLineOptions::getIndentScheme() const { + return indentScheme; +} +const string &CmdLineOptions::getAdditionalDataDir()const{ + return additionalDataDir; +} +const string &CmdLineOptions::getLanguage() const { + return language; +} +const string&CmdLineOptions::getCharSet() const{ + return charset; +} +bool CmdLineOptions::printIndexFile() const{ + return opt_printindex && (outputType==highlight::HTML || + outputType==highlight::XHTML); +} +bool CmdLineOptions::printProgress() const{ + return opt_print_progress; +} +bool CmdLineOptions::fillLineNrZeroes() const{ + return opt_fill_zeroes; +} +bool CmdLineOptions::syntaxGiven() const{ + return opt_language; +} +bool CmdLineOptions::omitEncodingName() const{ + return StringTools::lowerCase(charset)=="none"; +} +bool CmdLineOptions::forceOutput() const{ + return opt_force_output; +} +const string CmdLineOptions::getHelpLang()const{ + return helpLang+".help"; +} +highlight::WrapMode CmdLineOptions::getWrappingStyle() const { + return wrappingStyle; +} +const vector <string> & CmdLineOptions::getInputFileNames() const{ + return inputFileNames; +} +void CmdLineOptions::readDirectory(const string & wildcard){ + // get matching files, use recursive search + bool directoryOK=Platform::getDirectoryEntries(inputFileNames, wildcard, true); + if (!directoryOK) + { + cerr << "highlight: No files matched the pattern \"" + << wildcard << "\"."<< endl; + } +} +void CmdLineOptions::loadConfigurationFile() +{ + #ifndef _WIN32 + #ifdef CONFIG_FILE_PATH + configFilePath=CONFIG_FILE_PATH; + #else + char* homeEnv=getenv("HOME"); + if (homeEnv==NULL) return; + configFilePath=string(homeEnv)+"/.highlightrc"; + #endif + #else + configFilePath = Platform::getAppPath() + "highlight.conf"; + #endif + ConfigurationReader presets(configFilePath); + + if (presets.found()) + { + string paramVal; + configFileRead=true; + + styleOutFilename = presets.getParameter(OPT_STYLE_OUT); + styleInFilename = presets.getParameter(OPT_STYLE_IN); + styleName = presets.getParameter(OPT_STYLE); + opt_style = !styleName.empty(); + language = presets.getParameter(OPT_SYNTAX); + opt_language = !language.empty(); + numberSpaces = StringTools::str2int(presets.getParameter(OPT_DELTABS)); + indentScheme = presets.getParameter(OPT_FORMATSTYLE); + + paramVal = presets.getParameter(OPT_DATADIR); + if (!paramVal.empty()) { + dataDir=validateDirPath( paramVal); + } + paramVal = presets.getParameter(OPT_ADDDATADIR); + if (!paramVal.empty()) { + additionalDataDir=validateDirPath(paramVal); + } + paramVal = presets.getParameter(OPT_OUTDIR); + if (!paramVal.empty()) { + outDirectory=validateDirPath(paramVal); + } + paramVal = presets.getParameter(OPT_ENCODING); + if (!paramVal.empty()) { + charset=paramVal; + } + + opt_include_style=getFlag(presets.getParameter(OPT_INC_STYLE)); + opt_verbose=getFlag(presets.getParameter(OPT_VERBOSE)); + opt_linenumbers=getFlag(presets.getParameter(OPT_LINENO)); + opt_fragment=getFlag(presets.getParameter(OPT_FRAGMENT)); + opt_attach_line_anchors=getFlag(presets.getParameter(OPT_ANCHORS)); + opt_printindex=getFlag(presets.getParameter(OPT_INDEXFILE)); + opt_quiet=getFlag(presets.getParameter(OPT_QUIET)); + opt_xslfo_fop=getFlag(presets.getParameter(OPT_FOP)); + opt_replacequotes=getFlag(presets.getParameter(OPT_REPLACE_QUOTES)); + opt_print_progress=getFlag(presets.getParameter(OPT_PROGRESSBAR)); + opt_fill_zeroes=getFlag(presets.getParameter(OPT_FILLZEROES)); + + if (getFlag(presets.getParameter(OPT_WRAP))) { + wrappingStyle=highlight::WRAP_DEFAULT; + } + if (getFlag(presets.getParameter(OPT_WRAPSIMPLE))) { + wrappingStyle=highlight::WRAP_SIMPLE; + } + if (getFlag(presets.getParameter(OPT_XHTML))) { + outputType=highlight::XHTML; + } else if (getFlag(presets.getParameter(OPT_RTF))) { + outputType=highlight::RTF; + } else if (getFlag(presets.getParameter(OPT_TEX))) { + outputType=highlight::TEX; + } else if (getFlag(presets.getParameter(OPT_LATEX))) { + outputType=highlight::LATEX; + } else if (getFlag(presets.getParameter(OPT_XSLFO))) { + outputType=highlight::XSLFO; + } else if (getFlag(presets.getParameter(OPT_ANSI))) { + outputType=highlight::ANSI; + } else if (getFlag(presets.getParameter(OPT_XML))) { + outputType=highlight::XML; + } + } +} + +string CmdLineOptions::validateDirPath(const string & path){ + return (path[path.length()-1] !=Platform::pathSeparator)? + path+Platform::pathSeparator : path; +} + +highlight::OutputType CmdLineOptions::getOutputType() const { + return outputType; +} + +void CmdLineOptions::printDeprecatedWarning(const char *oldOption, const char *newOption){ + cerr << "Warning: Long option \""<<oldOption << "\" is DEPRECATED."; + cerr << " Use \""<<newOption << "\" instead.\n"; +} +/*************************************************************************** + cmdlineoptions.h - description + ------------------- + begin : Sun Nov 25 2001 + copyright : (C) 2001 by André Simon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#ifndef CMDLINEOPTIONS_H +#define CMDLINEOPTIONS_H + +#include <string> +#include <map> +#include <cstdlib> +#include <iostream> +#include <fstream> + +#include "platform_fs.h" +#include "configurationreader.h" +#include "datadir.h" +#include "enums.h" + +#ifdef _WIN32 + #include <windows.h> +#endif + +// If your system does not know getopt_long, define USE_LOCAL_GETOPT +#if defined(_WIN32) || defined(__SVR4) || defined(__sun__) + // some compilers don't like redefinitions... + #ifndef USE_LOCAL_GETOPT + #define USE_LOCAL_GETOPT + #endif +#endif + +#ifdef USE_LOCAL_GETOPT + #include "getopt.h" +#else + #include <getopt.h> +#endif + +#define OPT_VERBOSE "verbose" +#define OPT_INC_STYLE "include-style" +#define OPT_HELP "help" +#define OPT_LINENO "linenumbers" +#define OPT_XHTML "xhtml" +#define OPT_RTF "rtf" +#define OPT_TEX "tex" +#define OPT_LATEX "latex" +#define OPT_XSLFO "xsl-fo" +#define OPT_FRAGMENT "fragment" +#define OPT_ANCHORS "anchors" +#define OPT_LISTTHEMES "list-themes" +#define OPT_LISTLANGS "list-langs" +#define OPT_VERSION "version" +#define OPT_IN "input" +#define OPT_OUT "output" +#define OPT_SYNTAX "syntax" +#define OPT_STYLE "style" +#define OPT_STYLE_OUT "style-outfile" +#define OPT_STYLE_IN "style-infile" + +#define OPT_DELTABS "replace-tabs" +#define OPT_BATCHREC "batch-recursive" +#define OPT_OUTDIR "outdir" +#define OPT_FORMATSTYLE "format-style" +#define OPT_DATADIR "data-dir" +#define OPT_ADDDATADIR "add-data-dir" +#define OPT_INDEXFILE "print-index" +#define OPT_HELPINT "help-int" +#define OPT_WRAP "wrap" +#define OPT_WRAPSIMPLE "wrap-simple" +#define OPT_QUIET "quiet" +#define OPT_REPLACE_QUOTES "replace-quotes" +#define OPT_FOP "fop-compatible" +#define OPT_PROGRESSBAR "progress" +#define OPT_FILLZEROES "zeroes" +#define OPT_ANSI "ansi" +#define OPT_XML "xml" +#define OPT_ENCODING "encoding" +#define OPT_FORCE_OUTPUT "force" + +#define S_OPT_ANSI 'A' +#define S_OPT_OUT 'o' +#define S_OPT_IN 'i' +#define S_OPT_SYNTAX 'S' +#define S_OPT_VERBOSE 'v' +#define S_OPT_INC_STYLE 'I' +#define S_OPT_HELP 'h' +#define S_OPT_HELPINT 'H' +#define S_OPT_LINENO 'l' +#define S_OPT_STYLE 's' +#define S_OPT_STYLE_OUT 'c' +#define S_OPT_STYLE_IN 'e' +#define S_OPT_DELTABS 't' +#define S_OPT_XHTML 'X' +#define S_OPT_RTF 'R' +#define S_OPT_TEX 'T' +#define S_OPT_LATEX 'L' +#define S_OPT_XSLFO 'Y' +#define S_OPT_XML 'Z' +#define S_OPT_BATCHREC 'B' +#define S_OPT_FRAGMENT 'f' +#define S_OPT_ANCHORS 'a' +#define S_OPT_LISTTHEMES 'w' +#define S_OPT_LISTLANGS 'p' +#define S_OPT_OUTDIR 'O' + +#define S_OPT_FORMATSTYLE 'F' +#define S_OPT_DATADIR 'D' +#define S_OPT_ADDDATADIR 'E' +#define S_OPT_INDEXFILE 'C' +#define S_OPT_WRAP 'W' +#define S_OPT_WRAPSIMPLE 'V' +#define S_OPT_QUIET 'q' +#define S_OPT_FOP 'g' +#define S_OPT_REPLACE_QUOTES 'r' +#define S_OPT_VERSION 'Q' +#define S_OPT_PROGRESSBAR 'P' +#define S_OPT_FILLZEROES 'z' +#define S_OPT_ENCODING 'u' + +// deprecated: +#define OPT_CSSOUT "css-outfile" +#define OPT_CSSIN "css-infile" +#define OPT_INC_CSS "include-css" + + +#define S_OPTIONS_STRING "o:i:S:B:O:s:c:e:t:u:F:D:H:E:afghlvwpqrzACILYRTZXUV::W::P" + +using namespace std; + +/**Command line options*/ + +class CmdLineOptions + { + public: + + /**Constructor + \param argc Argument count + \param argv Argument strings + */ + CmdLineOptions(int argc, char *argv[]); + ~CmdLineOptions(); + + /** \return Single output file name*/ + const string &getSingleOutFilename(); + + /** \return Single input file name*/ + const string &getSingleInFilename() const; + + /** \return Output directory*/ + const string& getOutDirectory() ; + + /** \return Style output file name*/ + const string getStyleOutFilename() const; + + /** \return Style input file name*/ + const string&getStyleInFilename() const; + + /** \return Char set*/ + const string&getCharSet() const; + + /** \return Number of spaces to replace a tab*/ + int getNumberSpaces() const; + + /** \return True if version information should be printed*/ + bool printVersion() const; + + /** \return True if help information should be printed*/ + bool printHelp() const; + + /** \return True if debug information should be printed*/ + bool printDebugInfo()const; + + /** \return True if Style definition should be included in output*/ + bool includeStyleDef() const; + + /** \return True if line numbers should be printed*/ + bool printLineNumbers() const; + + /** \return colour theme name */ + string getStyleName()const ; + + /** gibt true zurck, falls deutsche Hilfe ausgegeben werden soll */ + int helpLanguage() const; + + /** \return True if batch mode is active*/ + bool enableBatchMode() const; + + /** \return True if output shluld be fragmented*/ + bool fragmentOutput() const; + + /** \return output file suffix */ + string getOutFileSuffix() const; + + /** \return True if anchors should be attached to line numbers*/ + bool attachLineAnchors() const; + + /** \return True if list of installed themes should be printed*/ + bool showThemes() const; + + /** \return True if list of installed language definitions should be printed*/ + bool showLangdefs() const; + + /** \return True if loutput directory is given*/ + bool outDirGiven() const; + + /** \return True if refomatting is enabled*/ + bool formattingEnabled(); + + /** \return True if a new data directory is given*/ + bool dataDirGiven()const; + + /** \return True if an additional data directory is given*/ + bool additionalDataDirGiven()const; + + /** \return True if index file should be printed*/ + bool printIndexFile() const; + + /** \return True if quotes should be replaced by /dq in LaTeX*/ + bool replaceQuotes() const; + + /** \return Data directory*/ + const string &getDataDir()const; + + /** \return Additional data directory*/ + const string &getAdditionalDataDir()const; + + /** \return True if language syntax is given*/ + bool syntaxGiven() const; + + /** \return True if quiet mode is active*/ + bool quietMode() const; + + /** \return True if XSL-FO output should be FOP compatible*/ + bool fopCompatible() const; + + /** \return True if progress bar should be printed in batch mode */ + bool printProgress() const; + + /** \return True if line numbers are filled with leading zeroes */ + bool fillLineNrZeroes() const; + + /** \return name of help message file*/ + const string getHelpLang() const; + + /** \return programming language */ + const string &getLanguage()const ; + + /** \return Wrapping style*/ + highlight::WrapMode getWrappingStyle() const; + + /** \return List of input file names*/ + const vector <string> & getInputFileNames() const; + + /** \return Name of indentation scheme file */ + const string &getIndentScheme() const; + + /** \return Output file format */ + highlight::OutputType getOutputType() const; + + /** \return True if chosen output format supports referenced style files */ + bool formatSupportsExtStyle(); + + /** \return True if style output path was defined by user*/ + bool styleOutPathDefined() const{ + return opt_stylepath_explicit; + } + + /** \return True if encoding nasme should be omitted in output*/ + bool omitEncodingName() const; + + /** \return True if output should be generated if languege type is unknown*/ + bool forceOutput() const; + + private: + + int numberSpaces; // number of spaces which replace a tab + highlight::WrapMode wrappingStyle; // line wrapping mode + highlight::OutputType outputType; + + // name of single output file + string outFilename, + // output directory + outDirectory, + // programming language which will be loaded + language, + // name of colour theme + styleName, + // name of external style file + styleOutFilename, + // name of file to be included in external style file + styleInFilename, + // used to define data directories at runtime + dataDir, additionalDataDir; + // name of indenation scheme + string indentScheme; + + bool opt_language; + bool opt_include_style; + bool opt_help; + bool opt_version ; + bool opt_verbose; + bool opt_linenumbers; + bool opt_style; + bool opt_batch_mode; + bool opt_fragment; + bool opt_attach_line_anchors; + bool opt_show_themes; + bool opt_show_langdefs; + bool opt_asformat_output; + bool opt_printindex; + bool opt_quiet; + bool opt_xslfo_fop; + bool opt_replacequotes; + bool opt_print_progress; + bool opt_fill_zeroes; + bool opt_stylepath_explicit; + bool opt_force_output; + + bool configFileRead; + + string helpLang, charset; + string configFilePath; + + // list of all input file names + vector <string> inputFileNames; + + /** load highlight configuration file */ + void loadConfigurationFile(); + + /** \return file suffix */ + string getFileSuffix( const string & fileName) const; + + /** \return directory name of path */ + string getDirName( const string & path); + + /** get all entries in the directory defined by wildcard */ + void readDirectory(const string & wildcard); + + /** \return Boolean value of paramVal */ + bool getFlag(const string& paramVal); + + /** \return Valid path name */ + string validateDirPath(const string & path); + + void printDeprecatedWarning(const char *oldOption, const char *newOption); + }; + +#endif +/*************************************************************************** + codeparser.cpp - description + ------------------- + begin : Die Jul 9 2002 + copyright : (C) 2002 by André Simon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include "codegenerator.h" + +#include "htmlgenerator.h" +#include "xhtmlgenerator.h" +#include "rtfgenerator.h" +#include "latexgenerator.h" +#include "texgenerator.h" +#include "xslfogenerator.h" +#include "xmlgenerator.h" +#ifndef __WXMSW__ + #include "ansigenerator.h" +#endif + + +using namespace std; + +namespace highlight { + +CodeGenerator* CodeGenerator::generator=NULL; + +CodeGenerator* CodeGenerator::getInstance(OutputType type, + const string& styleInfoPath, + const string& styleInPath, + const string& styleOutPath, + const string& encoding, + bool includeStyle, + bool attachAnchors, + bool replaceQuotes, + bool fopCompatible, + int numSpaces, + WrapMode lineWrappingMode, + bool ln, + bool lnz, + bool fragment, + bool omitEncoding + ) { + if (generator==NULL){ + switch (type){ + case TEX: + generator = new TexGenerator (styleInfoPath); + break; + case LATEX: + generator = new LatexGenerator(styleInfoPath, replaceQuotes); + break; + case RTF: + generator = new RtfGenerator (styleInfoPath); + break; + case XSLFO: + generator = new XslFoGenerator(styleInfoPath, encoding, omitEncoding, + fopCompatible); + break; + case XML: + generator = new XmlGenerator(styleInfoPath,encoding, omitEncoding); + break; + case XHTML: + generator = new XHtmlGenerator(styleInfoPath, encoding, omitEncoding, + attachAnchors); + break; + #ifndef __WXMSW__ + case ANSI: + generator = new AnsiGenerator (styleInfoPath); + break; + #endif + default: + generator = new HtmlGenerator(styleInfoPath, encoding, omitEncoding, + attachAnchors); + } + } + generator->setType(type); + generator->setStyleInputPath(styleInPath); + generator->setStyleOutputPath(styleOutPath); + generator->setIncludeStyle(includeStyle); + generator->setPrintLineNumbers(ln); + generator->setPrintZeroes(lnz); + generator->setFragmentCode(fragment); + generator->setPreformatting(lineWrappingMode, + (generator->getPrintLineNumbers()) ? + MAX_LINE__WIDTH - LINE_NUMBER_WIDTH : MAX_LINE__WIDTH, + numSpaces ); + return generator; +} + +void CodeGenerator::deleteInstance(){ + delete generator; + generator=NULL; +} + + +CodeGenerator::CodeGenerator(): + in(NULL), + out(NULL), + maskWs(false), + excludeWs(false), + fragmentOutput(false), + showLineNumbers (false), + lineNumberFillZeroes(false), + lineNumber(0), + includeStyleDef(false), + lineIndex(0), + formatter(NULL), + preFormatter(NULL), + formattingEnabled(false), + formattingPossible(false), + outputType(highlight::HTML) +{} + +CodeGenerator::CodeGenerator(const string &colourTheme) + :in(NULL), + out(NULL), + maskWs(false), + excludeWs(false), + fragmentOutput(false), + showLineNumbers (false), + lineNumberFillZeroes(false), + lineNumber(0), + includeStyleDef(false), + stylePath(colourTheme), + lineIndex(0), + formatter(NULL), + preFormatter(NULL), + formattingEnabled(false), + formattingPossible(false), + outputType(highlight::HTML) +{ + line.reserve(100); + docStyle.load(stylePath); +} + +CodeGenerator::~CodeGenerator() +{ + delete preFormatter; + delete formatter; +} + + +/** Getter and Setter*/ + +void CodeGenerator::setPrintLineNumbers(bool flag){ + showLineNumbers=flag; +} + +bool CodeGenerator::getPrintLineNumbers(){ + return showLineNumbers; +} + +void CodeGenerator::setPrintZeroes(bool flag){ + lineNumberFillZeroes=flag; +} + +bool CodeGenerator::getPrintZeroes(){ + return lineNumberFillZeroes; +} + +void CodeGenerator::setFragmentCode(bool flag){ + fragmentOutput=flag; +} + +void CodeGenerator::setIncludeStyle(bool flag){ + includeStyleDef = flag; +} + +void CodeGenerator::setStyleInputPath(const string& path){ + styleInputPath = path; +} +void CodeGenerator::setStyleOutputPath(const string& path){ + styleOutputPath = path; +} + +const string& CodeGenerator::getStyleInputPath(){ + return styleInputPath; +} +const string& CodeGenerator::getStyleOutputPath(){ + return styleOutputPath; +} + + +bool CodeGenerator::getFragmentCode(){ + return fragmentOutput; +} + +void CodeGenerator::setStyleName(const string& s){ + stylePath=s; +} + +void CodeGenerator::setType(OutputType t){ + outputType = t; +} + +const string& CodeGenerator::getStyleName(){ + return stylePath; +} + +bool CodeGenerator::formattingDisabled(){ + return !formattingEnabled; +} + +bool CodeGenerator::formattingIsPossible(){ + return formattingPossible; +} + +void CodeGenerator::setPreformatting(WrapMode lineWrappingStyle, + unsigned int lineLength, + int numberSpaces ){ + bool enableWrap = lineWrappingStyle!=WRAP_DISABLED; + bool replaceTabs = numberSpaces > 0; + if (enableWrap || replaceTabs) { + preFormatter=new PreFormatter(enableWrap, replaceTabs); + if (enableWrap) + preFormatter->setWrappingProperties(lineLength, lineWrappingStyle==WRAP_DEFAULT); + if (replaceTabs) + preFormatter->setNumberSpaces(numberSpaces); + } +} + +/* +WrapMode CodeGenerator::getLineWrapping(){ + if (preFormatter==NULL) return WRAP_DISABLED; + return (preFormatter->indentCode()?WRAP_DEFAULT:WRAP_SIMPLE); +} +*/ +LanguageDefinition &CodeGenerator::getLanguage(){ + return langInfo; +} + +void CodeGenerator::reset() +{ + lineIndex = lineNumber = 0; + line.clear(); +} + + +/** sucht vorwaerts ab Position searchPos Ziffer in s und liefert Integerwert +der gefundenen Zahl zurueck. +Im SymbolString stehen die den einzelnen Symbolen zugeordneten Konstanten +immer HINTER diesen Symbolen*/ +State CodeGenerator::getState(const string &s, unsigned int searchPos) +{ + unsigned int i= searchPos+1, result=0; + + // nach Ziffer in s suchen + do { + ++i; + } while ((i<s.length()) && !isdigit(s[i])) ; + + // Zahl zusammensetzen + while ((i<s.length()) && isdigit(s[i])){ + result = result *10 + (s[i]-'0'); + ++i; + } + return ((result)? (State)result:_UNKNOWN); +} + +string CodeGenerator::getIdentifier() +{ + --lineIndex; + unsigned int startPos=lineIndex; + char c= line[lineIndex]; + + while ( ( lineIndex < line.length() + && ( StringTools::isAlpha(c) + || isdigit(c)) + || isAllowedChar(c)) + ) + { + ++lineIndex; + c= line[lineIndex]; + } + return string(line, startPos, lineIndex - startPos); +} + +string CodeGenerator::getNumber() +{ + --lineIndex; + unsigned int startPos=lineIndex; + char c=line[lineIndex]; + + while ( lineIndex < line.length() && ( + isdigit(c) + // don't highlight methods applied on numbers as part of the number + // i.e. Ruby: 3.xxx() + || (c == '.' && isdigit(line[lineIndex+1])) + // '-' is accepted as first character + || (c == '-' && lineIndex == startPos) + || (StringTools::isAlpha(c) && line[lineIndex-1]=='0') + || (isxdigit(c) || c=='L' || c=='U' || c=='l' || c=='u') )) + { + ++lineIndex; + c= line[lineIndex]; + } + return string(line,startPos, lineIndex-startPos); +} + +unsigned int CodeGenerator::getLineNumber() +{ + return lineNumber; +} + +bool CodeGenerator::readNewLine(string &newLine){ + bool eof; + terminatingChar=newLine[lineIndex-1]; + if (formattingPossible && formattingEnabled) + { + eof=!formatter->hasMoreLines(); + if (!eof) + { + newLine = formatter->nextLine(); + } + } + else // reformatting not enabled + { + eof = ! getline( *in, newLine); + } + return eof; +} + +unsigned char CodeGenerator::getInputChar() +{ + bool eol = lineIndex == line.length(); + + if (eol) { + bool eof=false; + if (preFormatter!=NULL){ + if (!preFormatter->hasMoreLines()) { + eof=readNewLine(line); + preFormatter->setLine(line); + } + line = preFormatter->getNextLine(); + } else { + eof=readNewLine(line); + } + lineIndex=0; + ++lineNumber; + line=StringTools::trimRight(line); + return (eof)?'\0':'\n'; + } + return line[lineIndex++]; +} + +State CodeGenerator::getCurrentState (bool lastStateWasNumber) +{ + unsigned char c; + + if (token.length()==0) { + c=getInputChar(); + } else { + lineIndex-= (token.length()-1); + c=token[0]; + } + if (c=='\n'){ + return _EOL; // End of line + } + + if (c=='\0') { + return _EOF; // End of file + } + + if (isspace(c)) { + token= c; + return _WS; + } + + // numbers have to be searched before using the symbolstring, + // as numbers are part of this string + if (isdigit(c) + // recognize floats like .5 + || (c=='.' && isdigit(line[lineIndex])) + // test if '-' belongs to a term like "1-2" + || ((c == '-') + && (!lastStateWasNumber) + && isdigit(StringTools::getNextNonWs(line, lineIndex))) ) + { + token = getNumber(); + return NUMBER; + } + unsigned int symbolLength; + size_t symbolPos; + bool found=false; + string symbols=langInfo.getSymbolString(); + + symbolPos = symbols.find(c); + // search symbols (comment delimiters, directives etc.) + // before keywords, because alphabetic chars may be part of symbols, too + while ((symbolPos!= string::npos) && (!found)) + { + symbolLength=symbols.find(' ', symbolPos)-symbolPos; + token = symbols.substr(symbolPos, symbolLength); + + // TODO Ruby =ende, =end bugfix (whitespace after symbol needs to be checked) + + // Abfrage nach Leerzeichen in SymbolString verhindert falsches + // Erkennen von Symbolteilen: + if (lineIndex && token == line.substr(lineIndex-1, symbolLength) + && isspace(symbols[symbolPos-1]) ) { + found = true; + lineIndex += (symbolLength-1); + } else { + symbolPos = symbols.find_first_not_of(' ',symbols.find(' ',symbolPos)); + } + } + + // dirty workaround stuff in here + if (found) { + State foundState = getState(symbols, symbolPos); + + // get the current keyword class id to apply the corresponding formatting style + if (foundState==KEYWORD_BEGIN || foundState==TAG_BEGIN ) { + currentKeywordClass=langInfo.getDelimPrefixClassID(token); + } + + // Full line quotes must start in coloumn 1 (Fortran 77) + if (langInfo.isFullLineComment() && foundState==SL_COMMENT){ + if (lineIndex==1) { + return SL_COMMENT; + } + } + // VHDL Workaround: distinguish string delimiters and event markers + // (same eymbol: ') + else if (langInfo.isVHDL() + && foundState==STRING && currentState!=STRING + && lineIndex > 1 + &&(isdigit(line[lineIndex-2]) || isalpha(line[lineIndex-2]))){ + c=line[lineIndex-1]; + // do not return, continue search... + } else { + return foundState; + } + } + + // Alphanumerisches Token parsen und als Keyword oder Type erkennen + if (StringTools::isAlpha(c) || langInfo.isPrefix(c) || isAllowedChar(c)) + { + if (langInfo.isPrefix(c)){ + token = c; + ++lineIndex; + token += getIdentifier(); + } else { + token = getIdentifier(); + } + string reservedWord=(langInfo.isIgnoreCase()) ? + StringTools::lowerCase(token):token; + currentKeywordClass=langInfo.isKeyword(reservedWord); + return (currentKeywordClass) ? KEYWORD : STANDARD; + } + + // Character not referring to any state + token = c; + return STANDARD; +} + +string CodeGenerator::maskString(const string & s) +{ + ostringstream ss; + for (unsigned int i=0;i< s.length();i++){ + ss << maskCharacter(s[i]); + } + return ss.str(); +} + +void CodeGenerator::printMaskedToken(bool flushWhiteSpace) +{ + if(flushWhiteSpace) flushWs(); + *out << maskString(token); + token.clear(); +} + +bool CodeGenerator::isAllowedChar(char c) +{ + return ( langInfo.getAllowedChars().find(c)!= string::npos); +} + +bool CodeGenerator::styleFound(){ + return docStyle.found(); +} + +bool CodeGenerator::printIndexFile(const vector<string> &fileList, + const string &outPath){ + return true; +} + +bool CodeGenerator::initIndentationScheme(const string &schemePath){ + + if (formatter!=NULL){ + return true; + } + + ConfigurationReader indentScheme(schemePath); + if (indentScheme.found()){ + if (formatter==NULL) { + formatter=new astyle::ASFormatter(); + + string brackets=indentScheme.getParameter("brackets"); + if (!brackets.empty()){ + // Break brackets from pre-block code (i.e. ANSI C/C++ style). + if (brackets=="break"){ + formatter->setBracketFormatMode(astyle::BREAK_MODE); + } + //Attach brackets to pre-block code (i.e. Java/K&R style). + else if (brackets=="attach"){ + formatter->setBracketFormatMode(astyle::ATTACH_MODE); + } + // Break definition-block brackets and attach command-block brackets. + else if (brackets=="linux"){ + formatter->setBracketFormatMode(astyle::BDAC_MODE); + } + // Break brackets before closing headers (e.g. 'else', 'catch', ..). + // Should be appended to --brackets=attach or --brackets=linux. + else if (brackets=="break-closing-headers"){ + formatter->setBreakClosingHeaderBracketsMode(true); + } + } + + string pad=indentScheme.getParameter("pad"); + if (!pad.empty()){ + //Insert space paddings around parenthesies only. + if (pad=="paren"){ + formatter->setParenthesisPaddingMode(true); + } + // Insert space paddings around operators only. + else if (pad=="oper"){ + formatter->setOperatorPaddingMode(true); + } + //Insert space paddings around operators AND parenthesies. + else if (pad=="all"){ + formatter->setOperatorPaddingMode(true); + formatter->setParenthesisPaddingMode(true); + } + } + + string oneLine=indentScheme.getParameter("one-line"); + if (!oneLine.empty()){ + // Don't break one-line blocks. + if (oneLine=="keep-blocks"){ + formatter->setBreakOneLineBlocksMode(false); + } + // Don't break complex statements and multiple statements residing in a + // single line. + else if (oneLine=="keep-statements"){ + formatter->setSingleStatementsMode(false); + } + } + + // Insert empty lines around unrelated blocks, labels, classes, ... + string breakBlocks=indentScheme.getParameter("break-blocks"); + if (!breakBlocks.empty()){ + if (breakBlocks=="all"){ + //Like --break-blocks, except also insert empty lines around closing + //headers (e.g. 'else', 'catch', ...). + formatter->setBreakClosingHeaderBlocksMode(true); + } + formatter->setBreakBlocksMode(true); + } + string trueVal="true"; + + // Other options... + + //Indent using # spaces per indent. Not specifying # will result in a + //default of 4 spaces per indent. + string indentSpaces=indentScheme.getParameter("indent-spaces"); + + // Indent a minimal # spaces in a continuous conditional belonging to a + //conditional header. + string minConditionalIndent=indentScheme.getParameter("min-conditional-indent"); + + // Indent a maximal # spaces in a continuous statement, relatively to the + // previous line. + string maxInStatementIndent=indentScheme.getParameter("max-instatement-indent"); + + // Add extra indentation to '{' and '}' block brackets. + string indentBrackets=indentScheme.getParameter("indent-brackets"); + + // Add extra indentation entire blocks (including brackets). + string indentBlocks=indentScheme.getParameter("indent-blocks"); + + // Indent the contents of namespace blocks. + string indentNamespaces=indentScheme.getParameter("indent-namespaces"); + + // Indent 'class' blocks, so that the inner 'public:','protected:' and + // 'private: headers are indented inrelation to the class block. + string indentClasses=indentScheme.getParameter("indent-classes"); + + // Indent 'switch' blocks, so that the inner 'case XXX:' headers are + // indented in relation to the switch block. + string indentSwitches=indentScheme.getParameter("indent-switches"); + + // Indent 'case XXX:' lines, so that they are flush with their bodies.. + string indentCases=indentScheme.getParameter("indent-cases"); + + // Indent labels so that they appear one indent less than the current + // indentation level, rather than being flushed completely to the left + // (which is the default). + string indentLabels=indentScheme.getParameter("indent-labels"); + + // Indent multi-line #define statements + string indentPreprocessor=indentScheme.getParameter("indent-preprocessor"); + + // Break 'else if()' statements into two different lines. + string breakElseIfs = indentScheme.getParameter("break-elseifs"); + + string javaStyle = indentScheme.getParameter("java-style"); + + // default values in ASBeautifier are false, it is ok to set them false + // if parameter does not exist in scheme file + formatter->setBracketIndent(indentBrackets==trueVal); + formatter->setBlockIndent(indentBlocks==trueVal); + formatter->setNamespaceIndent(indentNamespaces==trueVal); + formatter->setClassIndent(indentClasses==trueVal); + formatter->setSwitchIndent(indentSwitches==trueVal); + formatter->setCaseIndent(indentCases==trueVal); + formatter->setLabelIndent(indentLabels==trueVal); + formatter->setPreprocessorIndent(indentPreprocessor==trueVal); + formatter->setBreakElseIfsMode(breakElseIfs==trueVal); + + if (javaStyle==trueVal){ + formatter->setJavaStyle(); + } + + if (!indentSpaces.empty()){ + formatter->setSpaceIndentation(StringTools::str2int(indentSpaces)); + } + if (!minConditionalIndent.empty()){ + formatter->setMinConditionalIndentLength( + StringTools::str2int(minConditionalIndent)); + } + if (!maxInStatementIndent.empty()){ + formatter->setMinConditionalIndentLength( + StringTools::str2int(maxInStatementIndent)); + } + } + formattingEnabled=(formatter != NULL); + return true; + } else { + return false; + } +} + +LoadResult CodeGenerator::initLanguage(const string& langDefPath){ + bool reloadNecessary= langInfo.needsReload(langDefPath); + if (reloadNecessary){ + bool failure = !langInfo.load(langDefPath); + + if (failure) { + return LOAD_FAILED; + } + + formattingPossible=langInfo.enableReformatting(); + + if (styleTagOpen.size()>NUMBER_BUILTIN_STYLES){ + // remove dynamic keyword tag delimiters of the old language definition + vector<string>::iterator keyStyleOpenBegin = + styleTagOpen.begin() + NUMBER_BUILTIN_STYLES; + vector<string>::iterator keyStyleCloseBegin = + styleTagClose.begin()+ NUMBER_BUILTIN_STYLES; + styleTagOpen.erase(keyStyleOpenBegin, styleTagOpen.end()); + styleTagClose.erase(keyStyleCloseBegin, styleTagClose.end()); + } + // add new keyword delimiters + for (unsigned int i=0;i< langInfo.getKeywordClasses().size(); i++){ + styleTagOpen.push_back(getMatchingOpenTag(i)); + styleTagClose.push_back(getMatchingCloseTag(i)); + } + } + return (reloadNecessary) ? LOAD_NEW : LOAD_NONE; +} + +ParseError CodeGenerator::printOutput (const string & inFileName, + const string &outFileName) +{ + if (!docStyle.found()) { + return BAD_STYLE; + } + reset(); + + ParseError error=PARSE_OK; + + in = (inFileName.empty()? &cin :new ifstream (inFileName.c_str())); + if (!in->fail()) { + out = (outFileName.empty()? &cout :new ofstream (outFileName.c_str())); + if ( out->fail()) { + error=BAD_OUTPUT; + } + } + + if ( in->fail()){ + error=BAD_INPUT; + } + + if (error==PARSE_OK) { + if (formatter != NULL){ + formatter->init(new astyle::ASStreamIterator(in)); + } + if (! fragmentOutput){ + *out << getHeader(inFileName); + } + printBody(); + if (! fragmentOutput){ + *out << getFooter(); + } + } + + if (!outFileName.empty()){ + delete out; out=NULL; + } + if (!inFileName.empty()) { + delete in; in=NULL; + } + return error; +} + + +unsigned int CodeGenerator::getStyleID(State s, unsigned int kwClassID){ + if (s==KEYWORD && kwClassID){ + return NUMBER_BUILTIN_STYLES + kwClassID-1; + } + return (unsigned int) s ; +} + +void CodeGenerator::closeTag(State s){ + *out << styleTagClose[(unsigned int)s]; + flushWs(); + currentState=_UNKNOWN; +} + +void CodeGenerator::openTag(State s){ + *out << styleTagOpen[(unsigned int)s]; + currentState=s; +} + +void CodeGenerator::closeKWTag(unsigned int kwClassID){ + *out << styleTagClose[getStyleID(KEYWORD, kwClassID)]; + + flushWs(); + currentState=_UNKNOWN; +} + +void CodeGenerator::openKWTag(unsigned int kwClassID){ + *out << styleTagOpen[getStyleID(KEYWORD, kwClassID)]; + currentState=KEYWORD; +} + + +/////////////////////////////////////////////////////////////////////////////// + +void CodeGenerator::processRootState() +{ + if (langInfo.highlightingDisabled()){ + string line; + while (getline(*in, line)){ + *out << maskString(line) << getNewLine(); + } + *out << flush; + return; + } + + State state=STANDARD; + + bool eof=false, + firstLine=true; // avoid newline before printing the first output line + openTag(STANDARD); + do { + // determine next state + state= getCurrentState(state==NUMBER); + // handle current state + switch(state) + { + case KEYWORD: + case KEYWORD_BEGIN: + closeTag(STANDARD); + eof=processKeywordState(state); + openTag(STANDARD); + break; + case NUMBER: + closeTag(STANDARD); + eof=processNumberState(); + openTag(STANDARD); + break; + case ML_COMMENT_BEGIN: + closeTag(STANDARD); + eof=processMultiLineCommentState(); + openTag(STANDARD); + break; + case SL_COMMENT: + closeTag(STANDARD); + eof=processSingleLineCommentState(); + openTag(STANDARD); + break; + case STRING: + closeTag(STANDARD); + eof=processStringState(STANDARD); + openTag(STANDARD); + break; + case DIRECTIVE_LINE: + closeTag(STANDARD); + eof=processDirectiveState(); + openTag(STANDARD); + break; + case TAG_BEGIN: + closeTag(STANDARD); + eof=processTagState(); + openTag(STANDARD); + break; + case ESC_CHAR: + if (langInfo.allowExtEscSeq()){ + closeTag(STANDARD); + eof=processEscapeCharState(); + openTag(STANDARD); + } else { + printMaskedToken(); + } + break; + case SYMBOL: + closeTag(STANDARD); + eof=processSymbolState(); + openTag(STANDARD); + break; + case _EOL: + insertLineNumber(!firstLine); + firstLine=false; + break; + case _EOF: + eof=true; + break; + case _WS: + processWsState(); + break; + default: + printMaskedToken(); + break; + } + } + while (!eof); + closeTag(STANDARD); + *out << getNewLine(); + *out << flush; +} + +bool CodeGenerator::processKeywordState(State myState){ + State newState=STANDARD; + unsigned int myClassID=currentKeywordClass; + bool eof=false, + exitState=false; + + openKWTag(myClassID); + do { + printMaskedToken(newState!=_WS); + newState= getCurrentState(); + switch(newState) + { + case _WS: + processWsState(); + break; + case _EOL: + insertLineNumber(); + exitState=true; + break; + case _EOF: + eof = true; + break; + case KEYWORD_END: + if (myState==KEYWORD_BEGIN){ + printMaskedToken(); + } + exitState=true; + break; + default: + exitState= myState!=KEYWORD_BEGIN + &&((myClassID!=currentKeywordClass)||(myState!=newState)); + break; + } + } while ((!exitState) && (!eof)); + + closeKWTag(myClassID); + + currentKeywordClass=0; + return eof; +} + +bool CodeGenerator::processNumberState(){ + State newState=STANDARD; + bool eof=false, + exitState=false; + + openTag(NUMBER); + do { + printMaskedToken(newState!=_WS); + newState= getCurrentState(true); + switch(newState) + { + case _WS: + processWsState(); + break; + case _EOL: + insertLineNumber(); + exitState=true; + break; + case _EOF: + eof = true; + break; + default: + exitState=newState!=NUMBER; + break; + } + } while ((!exitState) && (!eof)); + + closeTag(NUMBER); + return eof; +} + +bool CodeGenerator::processMultiLineCommentState() +{ + int commentCount=1; + State newState=STANDARD; + bool eof=false, exitState=false; + + openTag(ML_COMMENT_BEGIN); + do { + printMaskedToken(newState!=_WS); + newState= getCurrentState(); + + switch(newState) + { + case _WS: + processWsState(); + break; + case _EOL: + wsBuffer += styleTagClose[ML_COMMENT_BEGIN]; + insertLineNumber(); + wsBuffer += styleTagOpen[ML_COMMENT_BEGIN]; + break; + case _EOF: + eof = true; + break; + case ML_COMMENT_BEGIN: + if (langInfo.allowNestedMLComments()) { + ++commentCount; + } + break; + case ML_COMMENT_END: + commentCount--; + if (!commentCount){ + printMaskedToken(); + exitState=true; + } + break; + default: + break; + } + } while ((!exitState) && (!eof)); + + closeTag(ML_COMMENT_BEGIN); + return eof; +} + +bool CodeGenerator::processSingleLineCommentState() +{ + + //if ( checkSpecialCmd()) return false; + + State newState=STANDARD; + bool eof=false, exitState=false; + + openTag(SL_COMMENT); + do { + printMaskedToken(newState!=_WS); + newState= getCurrentState(); + + switch(newState) + { + case _WS: + processWsState(); + break; + case _EOL: + printMaskedToken(); + insertLineNumber(); + exitState=true; + break; + case _EOF: + eof = true; + break; + default: + break; + } + } while ((!exitState) && (!eof)); + + closeTag(SL_COMMENT); + return eof; +} + +bool CodeGenerator::processDirectiveState() +{ + State newState=STANDARD; + bool eof=false, exitState=false; + + openTag(DIRECTIVE_LINE); + do { + printMaskedToken(newState!=_WS); + newState= getCurrentState(); + switch(newState) + { + case _WS: + processWsState(); + break; + case DIRECTIVE_LINE_END: + printMaskedToken(); + exitState=true; + break; + case _EOL: + printMaskedToken(); + exitState=(terminatingChar!=langInfo.getContinuationChar()); + if (!exitState) wsBuffer += styleTagClose[DIRECTIVE_LINE]; + insertLineNumber(); + if (!exitState) wsBuffer += styleTagOpen[DIRECTIVE_LINE]; + break; + case ML_COMMENT_BEGIN: + closeTag(DIRECTIVE_LINE); + eof= processMultiLineCommentState(); + openTag(DIRECTIVE_LINE); + break; + case SL_COMMENT: + closeTag(DIRECTIVE_LINE); + eof= processSingleLineCommentState(); + openTag(DIRECTIVE_LINE); + exitState=true; + break; + case STRING: + closeTag(DIRECTIVE_LINE); + eof=processStringState(DIRECTIVE_LINE); + openTag(DIRECTIVE_LINE); + break; + case _EOF: + eof = true; + break; + default: + break; + } + } while ((!exitState) && (!eof)); + + closeTag(DIRECTIVE_LINE); + return eof; +} + +bool CodeGenerator::processStringState(State oldState) +{ + State newState=STANDARD; + bool eof=false, exitState=false; + bool returnedFromOtherState=false; + // Test if character before string open delimiter token equals to the + // raw string prefix (Example: r" ", r""" """ in Python) + bool isRawString= + line[lineIndex-token.length()-1]==langInfo.getRawStringPrefix(); + + string openStringDelimiter=token; + + State myState= (oldState==DIRECTIVE_LINE) ? DIRECTIVE_STRING : STRING; + openTag(myState); + do { + // true if last token was an escape char + if (!returnedFromOtherState) { + printMaskedToken(newState!=_WS); + } + returnedFromOtherState=false; + newState= getCurrentState(); + + switch(newState) + { + case _WS: + processWsState(); + break; + case _EOL: + wsBuffer += styleTagClose[myState]; + insertLineNumber(); + wsBuffer += styleTagOpen[myState]; + //exitState=true; + break; + case ML_COMMENT_END: + printMaskedToken(); + break; + case STRING: + exitState= openStringDelimiter==token; + printMaskedToken(); + break; + case ESC_CHAR: + if (!isRawString){ + closeTag(myState); + eof=processEscapeCharState(); + openTag(myState); + returnedFromOtherState=true; + } + break; + case _EOF: + eof = true; + break; + default: + printMaskedToken(); + break; + } + } while ((!exitState) && (!eof)); + + closeTag(myState); + return eof; +} + +bool CodeGenerator::processTagState() +{ + State newState=STANDARD; + bool eof=false, exitState=false, returnedFromOtherState=false; + unsigned int myKeywordClass=currentKeywordClass; + + openTag(KEYWORD); + do { + if (!returnedFromOtherState) { + printMaskedToken(newState!=_WS); + } + returnedFromOtherState = false; + newState= getCurrentState(); + + switch(newState) + { + case _WS: + processWsState(); + break; + case _EOL: + insertLineNumber(); + exitState=true; + break; + case TAG_END: + printMaskedToken(); + exitState=true; + break; + case STRING: + closeTag(KEYWORD); + eof=processStringState(KEYWORD); + currentKeywordClass=myKeywordClass; + openTag(KEYWORD); + returnedFromOtherState = true; + break; + case ESC_CHAR: + closeTag(KEYWORD); + eof=processEscapeCharState(); + currentKeywordClass=myKeywordClass; + openTag(KEYWORD); + returnedFromOtherState = true; + break; + case NUMBER: + closeTag(KEYWORD); + eof=processNumberState(); + currentKeywordClass=myKeywordClass; + openTag(KEYWORD); + returnedFromOtherState = true; + break; + case _EOF: + eof = true; + break; + default: + printMaskedToken(); + break; + } + } while ((!exitState) && (!eof)); + + closeTag(KEYWORD); + currentKeywordClass=0; + + return eof; +} + +bool CodeGenerator::processSymbolState(){ + + State newState=STANDARD; + bool eof=false, + exitState=false; + + openTag(SYMBOL); + do { + printMaskedToken(newState!=_WS); + newState= getCurrentState(true); + switch(newState) + { + case _WS: + processWsState(); + break; + case _EOL: + insertLineNumber(); + exitState=true; + break; + case _EOF: + eof = true; + break; + default: + exitState=newState!=SYMBOL; + break; + } + } while ((!exitState) && (!eof)); + + closeTag(SYMBOL); + return eof; +} + +bool CodeGenerator::processEscapeCharState() +{ + State newState=STANDARD; + bool eof=false, exitState=false; + + openTag(ESC_CHAR); + do { + printMaskedToken(newState!=_WS); + skipEscapeSequence(); + newState= getCurrentState(); + switch(newState) + { + case _EOL: + insertLineNumber(); + exitState=true; + break; + case _WS: + processWsState(); + --lineIndex; + break; + case _EOF: + eof = true; + break; + default: + exitState=newState!=ESC_CHAR; + break; + } + } while ((!exitState) && (!eof)); + + closeTag(ESC_CHAR); + return eof; +} + +void CodeGenerator::skipEscapeSequence(){ + if (lineIndex<line.length()){ + char c=line[lineIndex]; + int charsToSkip=1; + // Escape Sequenz /ooo Oktal, /x000 hex, /u00xx Java unicode + if (isdigit(c) ){ + // \0 abfangen + while ( isdigit(line[lineIndex+charsToSkip]) && charsToSkip<4) { + ++charsToSkip; + } + } else if (tolower(c)=='x'){ + charsToSkip=langInfo.isJava() ? 4 : 3; + } else if (tolower(c)=='u'){ + charsToSkip=5; + } + while (charsToSkip-- && lineIndex++<line.length()){ + *out <<maskCharacter(line[lineIndex-1]); + } + } +} + + +void CodeGenerator::processWsState() +{ + if (!maskWs) { + wsBuffer += token; + token.clear(); + return; + } + flushWs(); + int cntWs=0; + lineIndex--; + + while (isspace(line[lineIndex]) ) { + ++cntWs; + ++lineIndex; + } + + if (cntWs>1) { + unsigned int styleID=getStyleID(currentState, currentKeywordClass); + if (excludeWs && styleID!=_UNKNOWN) { + *out << styleTagClose[styleID]; + } + *out << maskWsBegin; + for (int i=0; i<cntWs; i++){ + *out << spacer; + } + *out << maskWsEnd; + if (excludeWs && styleID!=_UNKNOWN){ + *out << styleTagOpen[styleID]; + } + } else { + *out << token; + } + token.clear(); +} + +void CodeGenerator::flushWs(){ + *out<<wsBuffer; + wsBuffer.clear(); +} + +bool CodeGenerator::isFirstNonWsChar() { + unsigned int i=lineIndex-1; + while (i--){ + if (!isspace(line[i])){ + return false; + } + } + return true; +} + +string CodeGenerator::getNewLine(){ + return newLineTag; +} + +void CodeGenerator::insertLineNumber(bool insertNewLine) { + if (insertNewLine){ + wsBuffer += getNewLine(); + } + if (showLineNumbers) { + ostringstream os; + ostringstream numberPrefix; + if (lineNumberFillZeroes) { + os.fill('0'); + } + os <<setw(LINE_NUMBER_WIDTH) << right << lineNumber; + + numberPrefix << styleTagOpen[LINENUMBER] + << maskString(os.str()) << spacer + << styleTagClose[LINENUMBER]; + + wsBuffer += numberPrefix.str(); + } +} + +unsigned int CodeGenerator::getLineIndex(){ + return lineIndex; +} + +bool CodeGenerator::printExternalStyle(const string &outFile) +{ + if (!includeStyleDef && langInfo.getSyntaxHighlight()) { + ofstream cssOutFile(outFile.c_str()); + if (cssOutFile) { + cssOutFile << styleCommentOpen + <<" Style definition file generated by highlight " + << HIGHLIGHT_VERSION << ", " << HIGHLIGHT_URL + << " " << styleCommentClose << "\n"; + cssOutFile << "\n"<<styleCommentOpen<<" Highlighting theme definition: " + << styleCommentClose << "\n\n" + << getStyleDefinition() + << "\n"; + cssOutFile << readUserStyleDef(); + cssOutFile.close(); + } else { + return false; + } + } + return true; +} + +string CodeGenerator::readUserStyleDef(){ + ostringstream ostr; + if (!styleInputPath.empty()){ + ifstream userStyleDef(styleInputPath.c_str()); + if (userStyleDef) { + ostr << "\n"<<styleCommentOpen<<" Content of "<<styleInputPath<<": "<<styleCommentClose<<"\n"; + string line; + while (getline(userStyleDef, line)){ + ostr << line << "\n"; + } + userStyleDef.close(); + } else { + ostr << styleCommentOpen<<" ERROR: Could not include " + << styleInputPath + << "."<<styleCommentClose<<"\n"; + } + } + return ostr.str(); +} + +bool CodeGenerator::checkSpecialCmd(){ + bool insertNL = (lineIndex-token.length()); + cerr << "token: "<<token<< " index"<< lineIndex << " "<<line [ lineIndex ]<<endl; + + if (line [ lineIndex ]=='!'){ + // find cmd + size_t cmdPos1 = line.find('@', lineIndex+1); + + cerr << "cmdPos"<<cmdPos1<<endl; + if(cmdPos1==string::npos) return false; + size_t cmdPos2=cmdPos1+1; + while (cmdPos2 < line.length() && StringTools::isAlpha(line[cmdPos2])) cmdPos2++; + cerr << "cmdPos2"<<cmdPos2<<endl; + cerr << line.substr(cmdPos1, cmdPos2)<<endl; + + // hide comment line + token.clear(); + lineIndex=line.length(); + getInputChar(); lineNumber--; + if (insertNL) { lineNumber++;insertLineNumber();}; + // end hide + + return true; + } + + return false; +} + +} +/*************************************************************************** + codeparser.h - description + ------------------- + begin : Die Jul 9 2002 + copyright : (C) 2002 by Andre Simon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#ifndef CODEPARSER_H +#define CODEPARSER_H + +#include <iostream> +#include <sstream> +#include <string> +#include <iomanip> +#include <cctype> + +#include "languagedefinition.h" +#include "documentstyle.h" +#include "ASFormatter.h" +#include "preformatter.h" +#include "enums.h" + + +#define NUMBER_BUILTIN_STYLES 10 +#define LINE_NUMBER_WIDTH 5 +#define MAX_LINE__WIDTH 80 + +#define OUTPUT_FLAG_LN 1 +#define OUTPUT_FLAG_LNZ 2 +#define OUTPUT_FLAG_FRAG 4 + +/** The highlight namespace contains all classes and data structures + needed for parsing input data. +*/ +namespace highlight { + +/** \brief Base class for parsing. Works similar to a Turing machine. + + The virtual class provides source code parsing functioality, based on + information stored in language definitions.<br> + Deriving classes have to define the output format.<br> + Codegenerator is a singleton class. + +* @author Andre Simon +*/ + +class CodeGenerator + { + + public: + + virtual ~CodeGenerator(); + + /** + Get appropriate Codegenerator instance + \param type Output file type (HTML, XHTML, RTF, LATEX, TEX, XSLFO, ANSI) + \param styleInfoPath Path to formatting style information + \param styleInPath Path to style definition input file (to be included in styleOutPath) + \param styleOutPath Path to style definition output file (CSS path for HTML output) + \param encoding Output file encoding name + \param includeStyle Switch to include style information in output file (only XHTML, HTML) + \param attachAnchors Switch to attach anchors to line numbers (only XHTML, HTML) + \param replaceQuotes Switch to replace quotes by \dq{} (only LATEX) + \param fopCompatible Switch to generate FO for Apache FOP (only XSLFO) + \param omitEncoding Switch to omit encoding info in output document + \param ln Set true if line numbers should be printed + \param lnz Set true if leading space of line numbers should be filled with 0's + \param fragment Set true if document header and footer should be omitted + \param numSpaces Number of spaces which replace a tab + \param lineWrappingMode Line wrapping mode + */ + static CodeGenerator* getInstance(OutputType type, + const string& styleInfoPath, + const string& styleInPath, + const string& styleOutPath, + const string& encoding, + bool includeStyle, + bool attachAnchors, + bool replaceQuotes, + bool fopCompatible, + int numSpaces, + WrapMode lineWrappingMode, + bool ln, + bool lnz, + bool fragment, + bool omitEncoding ); + + /** Deletes the singleton CodeGenerator instance. + Call this method if getInstance was already called, or if you want to + free the momory after usage.*/ + static void deleteInstance(); + + /** + Generates output + \param inFileName Path of input file (if empty use stdin) + \param outFileName Path of input file (if empty use stdout) + + \return ParseError + */ + ParseError printOutput(const string &inFileName, const string &outFileName); + + /** \return True if document style was found */ + bool styleFound(); + + /** \return True if reformatting of current input is disabled */ + bool formattingDisabled(); + + /** \return True if reformatting of current input is possible */ + bool formattingIsPossible(); + + /** \param langDefPath Absolute path to language definition + \return Failure: LOAD_FAILED; Reload necessary: LOAD_NEW, + no reload necessary: LOAD_NONE */ + LoadResult initLanguage(const string& langDefPath); + + /** \return Language definition*/ + LanguageDefinition &getLanguage(); + + /** tell parser to output line numbers + \param flag true if line numbers should be printed + */ + void setPrintLineNumbers(bool flag); + + /** \return line number flag */ + bool getPrintLineNumbers(); + + + /** tell parser to output line numbers filled with zeroes + \param flag true if zeroes should be printed + */ + void setPrintZeroes(bool flag); + + /** \return print zeroes flag */ + bool getPrintZeroes(); + + /** tell parser to omit document header and footer + \param flag true if output should be fragmented + */ + void setFragmentCode(bool flag); + + /** \return fragment flag */ + bool getFragmentCode(); + + /** tell parser the style name + \param s path to style definition + */ + void setStyleName(const string& s); + + /** \return style path */ + const string& getStyleName(); + + /** tell parser the wrapping mode + \param lineWrappingStyle wrapping style + \param lineLength max line length + \param numberSpaces number of spaces which replace a tab + */ + void setPreformatting(WrapMode lineWrappingStyle, unsigned int lineLength,int numberSpaces); + + /** \return wrapping style */ + WrapMode getLineWrapping(); + + /** tell parser the include style definition in output + \param flag true if style should be included + */ + void setIncludeStyle(bool flag); + + /** Print style definitions to external file + \param outFile Path of external style definition + */ + bool printExternalStyle(const string &outFile); + + /** Print index file with all input file names + \param fileList List of output file names + \param outPath Output path + */ + virtual bool printIndexFile(const vector<string> & fileList, + const string &outPath); + + /** initialize source code indentation + \param indentSchemePath Path of indentation scheme + \return true id successfull + */ + bool initIndentationScheme(const string&indentSchemePath); + + /** Set style input path + \param s path to style input file + */ + void setStyleInputPath(const string& path); + + /** Set style output path + \param s path to style output file + */ + void setStyleOutputPath(const string& path); + +/** Set output type + \param s output type + */ + void setType(OutputType t); + + /** + \return style input file path + */ + const string& getStyleInputPath(); + + /** + \return style output file path + */ + const string& getStyleOutputPath(); + +protected: + + CodeGenerator(); + + //! CodeGenerator Constructor + /** + \param colourTheme Name of coloring style being used + */ + CodeGenerator(const string &colourTheme); + + /** \param c Character to be masked + \return Escape sequence of output format */ + virtual string maskCharacter(unsigned char c) = 0; + + /** \param s string + \return Copy of s with all escaped characters */ + string maskString(const string &s ) ; + + /** \param s Symbol string + \param searchPos Position where search starts + \return Found state (integer value) */ + State getState(const string &s, unsigned int searchPos); + + /** \return Next identifier in current line of code */ + string getIdentifier(); + + /** \return Next number in current line of code */ + string getNumber(); + + /** Insert line number at the beginning of current output line */ + virtual void insertLineNumber(bool insertNewLine=true); + + /** Prints document footer*/ + virtual string getFooter() = 0; + + /** Prints document body*/ + virtual void printBody() = 0; + + /** prints document header + \param title Title of the document + */ + virtual string getHeader(const string &title) = 0; + + /** Get current line number + \return line number */ + unsigned int getLineNumber(); + + + /** Tag Delimiters for every colour style*/ + vector <string> styleTagOpen, styleTagClose; + + /** Description of document colour style*/ + DocumentStyle docStyle; + + /** Language definition*/ + LanguageDefinition langInfo; + + /** Tag for inserting line feeds*/ + string newLineTag; + + /** String that represents a white space in output */ + string spacer; + + /** file input*/ + istream *in; + + /** file output*/ + ostream *out; + + /** Tags which enclose white space indentation blocks */ + string maskWsBegin, maskWsEnd; + + /** Style comment delimiters */ + string styleCommentOpen, styleCommentClose; + + /** Test if maskWsBegin and maskWsEnd should be applied */ + bool maskWs; + + /** Test if whitespace sould always be separated from enclosing tokens */ + bool excludeWs; + + /** Test if header and footer should be omitted */ + bool fragmentOutput; + + /** Test if line numbers should be printed */ + bool showLineNumbers; + + /** Test if leading spyce of line number should be filled with zeroes*/ + bool lineNumberFillZeroes; + + /** Current line of input file*/ + string line; + + /** Current line number */ + unsigned int lineNumber; + + // Zeigt den aktuellen Zustand an + // wird nicht in getCurrentState gesetzt, da nur Zustände interessant + // sind, die als Index auf die styleCloseTags und styleOpenTags verwendet + // werden können + /** Current state*/ + State currentState; + + /** keyword class id, used to apply the corresponding keyword style*/ + unsigned int currentKeywordClass; + + /** Processes origin state */ + void processRootState(); + + /** return line break sequence */ + virtual string getNewLine(); + + /** + \param s current state + \return Index of style tag corresponding to the states + */ + unsigned int getStyleID(State s, unsigned int kwClassID = 0); + + /** \return line index */ + unsigned int getLineIndex(); + + /** print all remaining white space*/ + void flushWs(); + + /** + \return Content of user defined input style + */ + string readUserStyleDef(); + + /** + \return Style definition of the chosen output format + */ + virtual string getStyleDefinition() {return "";}; + + /** contains white space, which will be printed after a closing tag */ + string wsBuffer; + + /** + Flag to test if style definition should be included in output document + */ + bool includeStyleDef; + +private: + + CodeGenerator(const CodeGenerator&){} + CodeGenerator& operator=(CodeGenerator&){ return *this;} + + static CodeGenerator* generator; + + /** return matching open and close tags of the given state */ + virtual string getMatchingOpenTag(unsigned int) = 0; + virtual string getMatchingCloseTag(unsigned int) = 0; + + /** open a new tag, set current state to s*/ + void openTag(State s); + + /** close opened tag, clear current state */ + void closeTag(State s); + + void closeTag(unsigned int styleID); + + void openTag(unsigned int styleID); + + // path to style definition file + string stylePath; + + // contains current position in line + unsigned int lineIndex; + + /**last character of the last line*/ + unsigned char terminatingChar; + + /** Class for reformatting */ + astyle::ASFormatter *formatter; + + /** Class for line wrapping and tab replacement*/ + PreFormatter *preFormatter; + + /** Flag to test if formatting is enabled with current input document*/ + bool formattingEnabled; + + + /** Flag to test if formatting is possible with current input document*/ + bool formattingPossible; + + /** contains the current token*/ + string token; + + string styleInputPath, styleOutputPath; + + /** Resets parser to origin state, call this after every file conversion */ + void reset(); + + /** read new line from in stream */ + bool readNewLine(string &newLine); + + /** return next character from in stream */ + unsigned char getInputChar(); + + OutputType outputType; + + /** return new state */ + State getCurrentState ( bool lastStateWasNumber=false); + + /** Methods that represent a parsing state */ + bool processKeywordState(State myState) ; + bool processNumberState() ; + bool processMultiLineCommentState(); + bool processSingleLineCommentState(); + bool processStringState(State oldState); + bool processEscapeCharState(); + bool processDirectiveState(); + bool processTagState(); + bool processSymbolState(); + void processWsState(); + + /** gibt true zurck, falls c ein erlaubter Character innerhalb von Keyword + oder Typbezeichner ist */ + bool isAllowedChar(char c) ; + + /** returns true if curret token is the first in line and no whitespace */ + bool isFirstNonWsChar() ; + + /** print escaped token and clears it */ + void printMaskedToken(bool flushWhiteSpace=true); + + /** print escape sequence */ + void skipEscapeSequence(); + + void closeKWTag(unsigned int styleID); + void openKWTag(unsigned int styleID); + + /** look for special commands in comments, take action in derived class + \return true if command was found + */ + bool checkSpecialCmd(); + + }; +} + +#endif + +/* + * Copyright (c) 1998,1999,2000,2001,2002 Tal Davidson. All rights reserved. + * + * compiler_defines.h (1 January 1999) + * by Tal Davidson (davidsont@bigfoot.com) + * This file is a part of "Artistic Style" - an indentater and reformatter + * of C, C++, C# and Java source files. + * + * The "Artistic Style" project, including all files needed to compile it, + * is free software; you can redistribute it and/or use it and/or modify it + * under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU General Public + * License along with this program. + */ + + + + + +/* + * comment out the line below if your compiler does NOT understand NAMESPACES + */ +#define USES_NAMESPACE + + +#if defined(__GNUC__) && __GNUC__ < 3 +// for G++ implementation of string.compare: +#define COMPARE(place, length, str) compare((str), (place), (length)) +#else +// for standard implementation of string.compare: +#define COMPARE(place, length, str) compare((place), (length), (str)) +#endif + + +// Fix by John A. McNamara +// Get rid of annoying MSVC warnings on debug builds about lengths of +// identifiers in template instantiations. +#ifdef _MSC_VER +#pragma warning( disable:4786 ) +#endif + +/*************************************************************************** + configurationreader.cpp - description + ------------------- + begin : Son Nov 10 2002 + copyright : (C) 2002 by André Simon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include "configurationreader.h" + +using namespace std; + +ConfigurationReader::ConfigurationReader(const string & configuration_path) +{ + ifstream in (configuration_path.c_str()); + fileFound=in; + if (fileFound) { + string line; + line.reserve(500); + unsigned int lineBegin; + size_t delimPos; + string paramName, paramValue; + while (getline(in, line)) { + lineBegin=line.find_first_not_of("\t "); + if ((line.size()>2) && (lineBegin!=string::npos) + && (line.at(lineBegin)!='#')) { //comment? + if (line[lineBegin]=='$') { // neuer Parametername? + delimPos=line.find("=",lineBegin)-1; + if (delimPos!=string::npos) { + paramName=StringTools::trimRight( + StringTools::lowerCase(line.substr(lineBegin+1, delimPos))); + parameterNames.push_back(paramName); + paramValue=line.substr(delimPos+2, line.length()); + } + } else { // line belongs to last parameter + paramValue=line; + } + if (parameterMap[paramName].empty()) { + parameterMap[paramName] = paramValue; + } else { + parameterMap[paramName]+= (" "+paramValue); + } + } //if ((lineBegin!=string::npos) && (line.at(lineBegin)!='#')) + } //while + in.close(); + } //if (in) +} + +ConfigurationReader::~ConfigurationReader() +{ +} + +bool ConfigurationReader::found() +{ + return fileFound; +} + +string &ConfigurationReader::getParameter(const string & paramName) +{ + return parameterMap[paramName] ; +} + +const char* ConfigurationReader::getCParameter(const string & paramName) +{ + return parameterMap[paramName].c_str() ; +} + +vector<string> &ConfigurationReader::getParameterNames() +{ + return parameterNames; +} +/*************************************************************************** + configurationreader.h - description + ------------------- + begin : Son Nov 10 2002 + copyright : (C) 2002 by Andr�Simon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#ifndef CONFIGURATIONREADER_H +#define CONFIGURATIONREADER_H + +#include <string> +#include <sstream> +#include <map> +#include <iostream> +#include <fstream> +#include <vector> + +#include "stringtools.h" + +using namespace std; + +/** Maps parameter keys to values*/ +typedef map<string, string> ParameterMap; + + +/** \brief Class to handle ASCII config files + + Configuration file format:<br> + $ParamName=ParamValue<br> + ParamValue may be splittet over multiple lines<br> + ParamName is not case sensitive<br> + Comments start with # as the first character of a line + + **/ + +class ConfigurationReader + { + public: + /** Constructor + \param configuration_path Path to configuration file + */ + ConfigurationReader(const string & configuration_path); + ~ConfigurationReader(); + + /** \param paramName Name of parameter + \return Value of parameter */ + string &getParameter(const string & paramName); + + /** \param paramName Name of parameter + \return Value of parameter */ + const char* getCParameter(const string & paramName); + + /** \return True if config file exists */ + bool found(); + + /** \return List of parameter names */ + vector<string> &getParameterNames(); + + private: + ParameterMap parameterMap; + bool fileFound; + vector<string> parameterNames; + }; + +#endif +/*************************************************************************** + dataDir.cpp - description + ------------------- + begin : Sam M� 1 2003 + copyright : (C) 2003 by André Simon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include "datadir.h" + +using namespace std; + +bool DataDir::searchDataDir(const string &userDefinedDir){ +#ifndef _WIN32 + + bool found = false; + //falls kein Datenverzeichnis angegeben, startIndex auf 1 setzen + int searchStartIndex=(userDefinedDir.empty()); + + string possibleDirs[] ={ userDefinedDir, + #ifdef HL_DATA_DIR + HL_DATA_DIR, + #endif + "/usr/share/highlight/" + }; + + for (int i=searchStartIndex;i< + #ifdef HL_DATA_DIR + 3 + #else + 2 + #endif + ;i++) + { + if (fileExists(possibleDirs[i])) + { + dataDir=possibleDirs[i]; + found = true; + } + if (found) { + break; + } + else { + if (!searchStartIndex) + cerr << "highlight: directory " + << userDefinedDir + << " specified by data-dir option not found.\n" + << " Searching another standard directory.\n"; + + } + } + return found; +#else + dataDir=userDefinedDir; + return true; +#endif + +} + +DataDir::DataDir() +{ +} + +void DataDir::setAdditionalDataDir(const string& dir){ + additionalDataDir=dir; +} + +const string &DataDir::getDir() +{ + return dataDir; +} + +const string DataDir::getLangDefDir() +{ + return dataDir+"langDefs"+Platform::pathSeparator; +} + +const string DataDir::getThemeDir() +{ + return dataDir+"themes"+Platform::pathSeparator; +} + +const string DataDir::getIndentSchemesDir() +{ + return dataDir+"indentSchemes"+Platform::pathSeparator; +} + + +const string DataDir::getAdditionalLangDefDir() +{ + return additionalDataDir+"langDefs"+Platform::pathSeparator; +} + +const string DataDir::getAdditionalThemeDir() +{ + return additionalDataDir+"themes"+Platform::pathSeparator; +} +const string DataDir::getAdditionalIndentSchemesDir() +{ + return additionalDataDir+"indentSchemes"+Platform::pathSeparator; +} + + +const string DataDir::getHelpMsgDir() +{ + return dataDir+"helpmsg"+Platform::pathSeparator; +} + +const string DataDir::searchForLangDef(const string & langDef){ + if (!additionalDataDir.empty()){ + string path=getAdditionalLangDefDir()+langDef; + if (fileExists(path)){ + return path; + } + } + return getLangDefDir()+langDef; +} + +const string DataDir::searchForTheme(const string & theme){ + if (!additionalDataDir.empty()){ + string path=getAdditionalThemeDir()+theme; + if (fileExists(path)){ + return path; + } + } + return getThemeDir()+theme; +} + +const string DataDir::searchForIndentScheme(const string & scheme){ + if (!additionalDataDir.empty()){ + string path=getAdditionalIndentSchemesDir()+scheme; + if (fileExists(path)){ + return path; + } + } + return getIndentSchemesDir()+scheme; +} + + +bool DataDir::fileExists(const string&f){ + ifstream file(f.c_str()); + bool exists=!file.fail(); + file.close(); + return exists; +} +/*************************************************************************** + datadir.h - description + ------------------- + begin : Sam M� 1 2003 + copyright : (C) 2003 by Andre Simon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#ifndef DATADIR_H +#define DATADIR_H + +#include <string> +#include <fstream> +#include <iostream> +//#include "stringtools.h" +#include "platform_fs.h" + +using namespace std; + + /** \brief Manages access to installation directories. + + Apart from the standard installation directory, one can define additional + search paths. + **/ + +class DataDir + { + string dataDir; + string additionalDataDir; + bool fileExists(const string&f); + + public: + + DataDir(); + + /** search for a valid installation directory + \param userDefinedDir Directory defined by user + \return True if directory was found */ + bool searchDataDir(const string &userDefinedDir); + + /** add another installation directory, which is added to search path + \param dir Directory defined by user */ + void setAdditionalDataDir(const string& dir); + + /** \return Data installation directory */ + const string & getDir() ; + + /** \return Location of languafe definitions */ + const string getLangDefDir() ; + + /** \return Location of themes */ + const string getThemeDir() ; + + /** \return Location of indentation schemes */ + const string getIndentSchemesDir(); + + /** \return User defined location of indentation schemes */ + const string getAdditionalIndentSchemesDir(); + + /** \return User defined location of languafe definitions */ + const string getAdditionalLangDefDir() ; + + /** \return User defined location of themes */ + const string getAdditionalThemeDir() ; + + /** \return Location of help files */ + const string getHelpMsgDir() ; + + /** \param langDef Name of language definition + \return Absolute path of definiton found in a data directory */ + const string searchForLangDef(const string & langDef); + + /** \param theme Name of colour theme file + \return Absolute path of theme found in a data directory */ + const string searchForTheme(const string & theme); + + /** \param scheme Name of indent scheme file + \return Absolute path of theme found in a data directory */ + const string searchForIndentScheme(const string & scheme); + }; + +#endif +/*************************************************************************** + documentstyle.cpp - description + ------------------- + begin : Son Nov 10 2002 + copyright : (C) 2002 by Andre Simon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include "documentstyle.h" + +namespace highlight { + +DocumentStyle::DocumentStyle(const string &styleDefinitionFile) +{ + fileFound=load(styleDefinitionFile); +} +DocumentStyle::DocumentStyle():fileFound(false) +{} + +bool DocumentStyle::load(const string &styleDefinitionPath) +{ + ConfigurationReader styleConfig(styleDefinitionPath); + if (styleConfig.found()){ + fontsize = styleConfig.getParameter("fontsize"); + bgColour.setRGBValues(styleConfig.getParameter("bgcolour")); + defaultElem.set(styleConfig.getParameter("defaultcolour")); + comment.set(styleConfig.getParameter("comment")); + directive.set(styleConfig.getParameter("directive")); + str.set(styleConfig.getParameter("string")); + escapeChar.set(styleConfig.getParameter("escapechar")); + number.set(styleConfig.getParameter("number")); + dstr.set(styleConfig.getParameter("string_directive")); + line.set(styleConfig.getParameter("line")); + + + string tmpstr; + // TODO: Remove this check as soon as all themes have a brackets attribute + tmpstr=styleConfig.getParameter("symbol"); + if (tmpstr.empty()) { + tmpstr=styleConfig.getParameter("defaultcolour"); + } + symbol.set(tmpstr); + +// TODO: Remove this check as soon as all themes have a sl-comment attribute + tmpstr=styleConfig.getParameter("sl-comment"); + if (tmpstr.empty()) { + tmpstr=styleConfig.getParameter("comment"); + } + slcomment.set(tmpstr); + + string paramVal; + vector<string> paramNames=styleConfig.getParameterNames(); + + //collect keyword classes, save corresponding style definition + for (unsigned int i=0;i<paramNames.size();i++){ + paramVal=paramNames[i]; + if (paramVal.find("kw_class") != string::npos){ + keywordStyles.insert(make_pair(StringTools::getParantheseVal(paramVal), + new ElementStyle(styleConfig.getParameter(paramVal)))); + } + } + + fileFound = true; + } + else { + fileFound = false; + } + return fileFound; +} + +DocumentStyle::~DocumentStyle() +{ + for(KSIterator iter = keywordStyles.begin(); iter != keywordStyles.end(); iter++){ + delete (*iter).second; //remove ElementStyle* + } +} + +string& DocumentStyle::getFontSize() + { + return fontsize; + } +StyleColour& DocumentStyle::getBgColour() + { + return bgColour; + } +ElementStyle& DocumentStyle::getDefaultStyle() + { + return defaultElem; + } +ElementStyle& DocumentStyle::getCommentStyle() + { + return comment; + } +ElementStyle& DocumentStyle::getSingleLineCommentStyle() + { + return slcomment; + } + + +ElementStyle& DocumentStyle::getStringStyle() + { + return str; + } +ElementStyle& DocumentStyle::getDirectiveStringStyle() + { + return dstr; + } +ElementStyle& DocumentStyle::getEscapeCharStyle() + { + return escapeChar; + } +ElementStyle& DocumentStyle::getNumberStyle() + { + return number; + } +ElementStyle& DocumentStyle::getDirectiveStyle() + { + return directive; + } +ElementStyle& DocumentStyle::getLineStyle() + { + return line; + } +ElementStyle& DocumentStyle::getSymbolStyle() + { + return symbol; + } +bool DocumentStyle::found () const + { + return fileFound; + } +ElementStyle& DocumentStyle::getKeywordStyle(const string &className){ + if (!keywordStyles.count(className)) return defaultElem; + return *keywordStyles[className]; +} + +vector <string> DocumentStyle::getClassNames(){ + vector <string> kwClassNames; + for(KSIterator iter = keywordStyles.begin(); iter != keywordStyles.end(); iter++){ + kwClassNames.push_back( (*iter).first); + } + return kwClassNames; +} + +KeywordStyles& DocumentStyle::getKeywordStyles(){ + return keywordStyles; +} + +} +/*************************************************************************** + documentstyle.h - description + ------------------- + begin : Son Nov 10 2002 + copyright : (C) 2002 by Andre Simon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#ifndef DOCUMENTSTYLE_H +#define DOCUMENTSTYLE_H + +#include <string> +#include <iostream> +#include "configurationreader.h" +#include "elementstyle.h" +#include "stylecolour.h" + +using namespace std; + +namespace highlight { + +/** maps keyword class names and the corresponding formatting information*/ +typedef map <string, ElementStyle*> KeywordStyles; + +/** iterator for keyword styles*/ +typedef KeywordStyles::iterator KSIterator; + +/** \brief Contains information about document formatting properties. + +* @author Andre Simon +*/ + +class DocumentStyle + { + private: + ElementStyle comment, slcomment, str, dstr, + escapeChar, number, directive, line, symbol; + ElementStyle defaultElem; + StyleColour bgColour; + + string fontsize; + bool fileFound; + + KeywordStyles keywordStyles; + + public: + /** Constructor + \param styleDefinitionPath Style definition path */ + DocumentStyle(const string & styleDefinitionPath); + DocumentStyle(); + ~DocumentStyle(); + + /** load sytle definition + \param styleDefinitionFile Style definition path + \return True if successfull */ + bool load(const string & styleDefinitionFile); + + /** \return class names defined in the theme file */ + vector <string> getClassNames(); + + /** \return keyword styles */ + KeywordStyles& getKeywordStyles(); + + /** \return Font size */ + string &getFontSize() ; + + /** \return Background colour*/ + StyleColour& getBgColour(); + + /** \return Style of default (unrecognized) strings */ + ElementStyle & getDefaultStyle() ; + + /** \return Comment style*/ + ElementStyle & getCommentStyle() ; + + /** \return Single line comment style*/ + ElementStyle& getSingleLineCommentStyle() ; + + /** \return Keyword style*/ + ElementStyle & getKeywordStyle() ; + + /** \return String style*/ + ElementStyle & getStringStyle() ; + + /** \return Directive line string style*/ + ElementStyle & getDirectiveStringStyle() ; + + /** \return Escape character style*/ + ElementStyle & getEscapeCharStyle() ; + + /** \return Number style*/ + ElementStyle & getNumberStyle() ; + + /** \return Directive style*/ + ElementStyle & getDirectiveStyle() ; + + /** \return Type style*/ + ElementStyle & getTypeStyle() ; + + /** \return Line number style*/ + ElementStyle & getLineStyle() ; + + /** \return Bracket style*/ + ElementStyle & getSymbolStyle() ; + + /** + \param className Name of keyword class + \return keyword style of the given className + */ + ElementStyle & getKeywordStyle(const string &className); + + /** \return True if language definition was found */ + bool found() const ; + }; + +} + +#endif +/*************************************************************************** + elementstyle.cpp - description + ------------------- + begin : Son Nov 10 2002 + copyright : (C) 2002 by André Simon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include "elementstyle.h" + +namespace highlight { + +ElementStyle::ElementStyle(StyleColour col, bool b, bool i, bool u) + : colour(col) , bold(b), italic(i), underline(u) +{} + +ElementStyle:: ElementStyle(const string & elementStyleString) + : bold(false), italic(false), underline(false) +{ + set(elementStyleString); +} + +ElementStyle::ElementStyle() + : bold(false), italic(false), underline(false) +{} + +void ElementStyle::set(const string & elementStyleString){ + + istringstream valueStream(elementStyleString.c_str()); + string r, g, b, attr; + valueStream >> r; + valueStream >> g; + valueStream >> b; + colour.setRedValue(r); + colour.setGreenValue(g); + colour.setBlueValue(b); + while ( valueStream >> attr) + { + if (attr=="italic") + { + italic = true; + } + else if (attr=="bold") + { + bold = true; + } + else if (attr=="underline") + { + underline = true; + } + } +} + +ElementStyle::~ElementStyle() +{} + +bool ElementStyle::isItalic() const +{ + return italic; +} +bool ElementStyle::isBold() const +{ + return bold; +} +bool ElementStyle::isUnderline() const +{ + return underline; +} +StyleColour ElementStyle::getColour() const +{ + return colour; +} + +} +/*************************************************************************** + elementstyle.h - description + ------------------- + begin : Son Nov 10 2002 + copyright : (C) 2002 by Andre Simon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#ifndef ELEMENTSTYLE_H +#define ELEMENTSTYLE_H + +#include <sstream> + +#include "stylecolour.h" + +using namespace std; + +namespace highlight { + +/** \brief The class stores the basic text formatting properties. + +* @author Andre Simon +*/ + +class ElementStyle { + public: + + /** Constructor + \param col Style colour + \param b Bold flag + \param i Italic flag + \param u Underline flag */ + ElementStyle(StyleColour col, bool b, bool i, bool u); + + /** Constuctor + \param elementStyleString String with fotmatting information */ + ElementStyle(const string & elementStyleString); + + ElementStyle(); + + ~ElementStyle(); + + /** initialize object + \param elementStyleString String which contains formatting attributes + */ + void set(const string & elementStyleString); + + /** \return True if italic */ + bool isItalic() const; + + /** \return True if bold */ + bool isBold() const; + + /** \return True if underline */ + bool isUnderline() const; + + /** \return Element colour */ + StyleColour getColour() const; + + private: + StyleColour colour; + bool bold, italic, underline; + }; + +} + +#endif +// +// C++ Interface: enums +// +// Description: +// +// +// Author: Andre Simon <andre.simon1@gmx.de>, (C) 2004 +// +// Copyright: See COPYING file that comes with this distribution +// +// + +#ifndef ENUMS_H +#define ENUMS_H + +namespace highlight { + +/** states which may occour during input file parsing*/ +enum State { + STANDARD=0, + STRING, + NUMBER, + SL_COMMENT, + ML_COMMENT_BEGIN, + ESC_CHAR, + DIRECTIVE_LINE, + DIRECTIVE_STRING, + LINENUMBER, + SYMBOL, + + // Konstanten ab hier duefen nicht mehr als Array-Indizes benutzt werden!! + KEYWORD, + ML_COMMENT_END, + DIRECTIVE_LINE_END, + TAG_BEGIN, + TAG_END, + KEYWORD_BEGIN, + KEYWORD_END, + + _UNKNOWN=100, + _EOL, + _EOF, + _WS +} ; + +/** Parser return values*/ +enum ParseError{ + PARSE_OK, + BAD_INPUT=1, + BAD_OUTPUT=2, + BAD_STYLE=4 +}; + +/** line wrapping modes*/ +enum WrapMode { + WRAP_DISABLED, + WRAP_SIMPLE, + WRAP_DEFAULT +}; + +/** language definition loading results*/ +enum LoadResult{ + LOAD_FAILED, + LOAD_NEW, + LOAD_NONE +}; + +/** output formats */ +enum OutputType { + HTML, + XHTML, + TEX, + LATEX, + RTF, + XSLFO, + XML, + ANSI +}; + +} + +#endif +/* Getopt for GNU. + NOTE: getopt is now part of the C library, so if you don't know what + "Keep this file name-space clean" means, talk to roland@gnu.ai.mit.edu + before changing it! + + Copyright (C) 1987, 88, 89, 90, 91, 92, 1993 + Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2, or (at your option) any + later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifndef __STDC__ +# ifndef const +# define const +# endif +#endif + +/* This tells Alpha OSF/1 not to define a getopt prototype in <stdio.h>. */ +#ifndef _NO_PROTO +#define _NO_PROTO +#endif + +#include <cstdio> +#include <cstring> +//#include "tailor.h" + +/* Comment out all this code if we are using the GNU C Library, and are not + actually compiling the library itself. This code is part of the GNU C + Library, but also included in many other GNU distributions. Compiling + and linking in this code is a waste when using the GNU C library + (especially if it is a shared library). Rather than having every GNU + program understand `configure --with-gnu-libc' and omit the object files, + it is simpler to just do this in the source for each such file. */ + +#if defined (_LIBC) || !defined (__GNU_LIBRARY__) + + +/* This needs to come after some library #include + to get __GNU_LIBRARY__ defined. */ +#ifdef __GNU_LIBRARY__ +/* Don't include stdlib.h for non-GNU C libraries because some of them + contain conflicting prototypes for getopt. */ +#include <stdlib.h> +#endif /* GNU C library. */ + +/* If GETOPT_COMPAT is defined, `+' as well as `--' can introduce a + long-named option. Because this is not POSIX.2 compliant, it is + being phased out. */ +/* #define GETOPT_COMPAT */ + +/* This version of `getopt' appears to the caller like standard Unix `getopt' + but it behaves differently for the user, since it allows the user + to intersperse the options with the other arguments. + + As `getopt' works, it permutes the elements of ARGV so that, + when it is done, all the options precede everything else. Thus + all application programs are extended to handle flexible argument order. + + Setting the environment variable POSIXLY_CORRECT disables permutation. + Then the behavior is completely standard. + + GNU application programs can use a third alternative mode in which + they can distinguish the relative order of options and other arguments. */ + +#include "getopt.h" + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +char *optarg = 0; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns EOF, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +/* XXX 1003.2 says this must be 1 before any call. */ +int optind = 0; + +/* The next char to be scanned in the option-element + in which the last option character we returned was found. + This allows us to pick up the scan where we left off. + + If this is zero, or a null string, it means resume the scan + by advancing to the next ARGV-element. */ + +static char *nextchar; + +/* Callers store zero here to inhibit the error message + for unrecognized options. */ + +int opterr = 1; + +/* Set to an option character which was unrecognized. + This must be initialized on some systems to avoid linking in the + system's own getopt implementation. */ + +#define BAD_OPTION '\0' +int optopt = BAD_OPTION; + +/* Describe how to deal with options that follow non-option ARGV-elements. + + If the caller did not specify anything, + the default is REQUIRE_ORDER if the environment variable + POSIXLY_CORRECT is defined, PERMUTE otherwise. + + REQUIRE_ORDER means don't recognize them as options; + stop option processing when the first non-option is seen. + This is what Unix does. + This mode of operation is selected by either setting the environment + variable POSIXLY_CORRECT, or using `+' as the first character + of the list of option characters. + + PERMUTE is the default. We permute the contents of ARGV as we scan, + so that eventually all the non-options are at the end. This allows options + to be given in any order, even with programs that were not written to + expect this. + + RETURN_IN_ORDER is an option available to programs that were written + to expect options and other ARGV-elements in any order and that care about + the ordering of the two. We describe each non-option ARGV-element + as if it were the argument of an option with character code 1. + Using `-' as the first character of the list of option characters + selects this mode of operation. + + The special argument `--' forces an end of option-scanning regardless + of the value of `ordering'. In the case of RETURN_IN_ORDER, only + `--' can cause `getopt' to return EOF with `optind' != ARGC. */ + +static enum +{ + REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER +} ordering; + +#ifdef __GNU_LIBRARY__ +/* We want to avoid inclusion of string.h with non-GNU libraries + because there are many ways it can cause trouble. + On some systems, it contains special magic macros that don't work + in GCC. */ +#include <string.h> +#define my_index strchr +#define my_strlen strlen +#else + +/* Avoid depending on library functions or files + whose names are inconsistent. */ + +#if __STDC__ || defined(PROTO) + #ifndef _WIN32 + // Solaris compilation fix + extern "C" { + char *getenv(const char *name); + int strncmp(const char *s1, const char *s2, int n); + } + // extern char *getenv(const char *name); + // extern int strncmp(const char *s1, const char *s2, int n); + #endif + extern int strcmp (const char *s1, const char *s2); + static int my_strlen(const char *s); + static char *my_index (const char *str, int chr); +#else + #ifndef _WIN32 + extern char *getenv (); + #endif +#endif + +static int +my_strlen (const char *str) + +{ + int n = 0; + while (*str++) + n++; + return n; +} + +static char * +my_index ( const char *str, + int chr) + +{ + while (*str) + { + if (*str == chr) + return (char *) str; + str++; + } + return 0; +} + +#endif /* GNU C library. */ + +/* Handle permutation of arguments. */ + +/* Describe the part of ARGV that contains non-options that have + been skipped. `first_nonopt' is the index in ARGV of the first of them; + `last_nonopt' is the index after the last of them. */ + +static int first_nonopt; +static int last_nonopt; + +/* Exchange two adjacent subsequences of ARGV. + One subsequence is elements [first_nonopt,last_nonopt) + which contains all the non-options that have been skipped so far. + The other is elements [last_nonopt,optind), which contains all + the options processed since those non-options were skipped. + + `first_nonopt' and `last_nonopt' are relocated so that they describe + the new indices of the non-options in ARGV after they are moved. + + To perform the swap, we first reverse the order of all elements. So + all options now come before all non options, but they are in the + wrong order. So we put back the options and non options in original + order by reversing them again. For example: + original input: a b c -x -y + reverse all: -y -x c b a + reverse options: -x -y c b a + reverse non options: -x -y a b c +*/ + +#if __STDC__ || defined(PROTO) +static void exchange (char **argv); +#endif + +static void +exchange (char **argv) + +{ + char *temp, **first, **last; + + /* Reverse all the elements [first_nonopt, optind) */ + first = &argv[first_nonopt]; + last = &argv[optind-1]; + while (first < last) { + temp = *first; *first = *last; *last = temp; first++; last--; + } + /* Put back the options in order */ + first = &argv[first_nonopt]; + first_nonopt += (optind - last_nonopt); + last = &argv[first_nonopt - 1]; + while (first < last) { + temp = *first; *first = *last; *last = temp; first++; last--; + } + + /* Put back the non options in order */ + first = &argv[first_nonopt]; + last_nonopt = optind; + last = &argv[last_nonopt-1]; + while (first < last) { + temp = *first; *first = *last; *last = temp; first++; last--; + } +} + +/* Scan elements of ARGV (whose length is ARGC) for option characters + given in OPTSTRING. + + If an element of ARGV starts with '-', and is not exactly "-" or "--", + then it is an option element. The characters of this element + (aside from the initial '-') are option characters. If `getopt' + is called repeatedly, it returns successively each of the option characters + from each of the option elements. + + If `getopt' finds another option character, it returns that character, + updating `optind' and `nextchar' so that the next call to `getopt' can + resume the scan with the following option character or ARGV-element. + + If there are no more option characters, `getopt' returns `EOF'. + Then `optind' is the index in ARGV of the first ARGV-element + that is not an option. (The ARGV-elements have been permuted + so that those that are not options now come last.) + + OPTSTRING is a string containing the legitimate option characters. + If an option character is seen that is not listed in OPTSTRING, + return BAD_OPTION after printing an error message. If you set `opterr' to + zero, the error message is suppressed but we still return BAD_OPTION. + + If a char in OPTSTRING is followed by a colon, that means it wants an arg, + so the following text in the same ARGV-element, or the text of the following + ARGV-element, is returned in `optarg'. Two colons mean an option that + wants an optional arg; if there is text in the current ARGV-element, + it is returned in `optarg', otherwise `optarg' is set to zero. + + If OPTSTRING starts with `-' or `+', it requests different methods of + handling the non-option ARGV-elements. + See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above. + + Long-named options begin with `--' instead of `-'. + Their names may be abbreviated as long as the abbreviation is unique + or is an exact match for some defined option. If they have an + argument, it follows the option name in the same ARGV-element, separated + from the option name by a `=', or else the in next ARGV-element. + When `getopt' finds a long-named option, it returns 0 if that option's + `flag' field is nonzero, the value of the option's `val' field + if the `flag' field is zero. + + The elements of ARGV aren't really const, because we permute them. + But we pretend they're const in the prototype to be compatible + with other systems. + + LONGOPTS is a vector of `struct option' terminated by an + element containing a name which is zero. + + LONGIND returns the index in LONGOPT of the long-named option found. + It is only valid when a long-named option has been found by the most + recent call. + + If LONG_ONLY is nonzero, '-' as well as '--' can introduce + long-named options. */ + +int +_getopt_internal ( int argc, + char *const *argv, + const char *optstring, + const struct option *longopts, + int *longind, + int long_only) + +{ + int option_index; + + optarg = 0; + + /* Initialize the internal data when the first call is made. + Start processing options with ARGV-element 1 (since ARGV-element 0 + is the program name); the sequence of previously skipped + non-option ARGV-elements is empty. */ + + if (optind == 0) + { + first_nonopt = last_nonopt = optind = 1; + + nextchar = NULL; + + /* Determine how to handle the ordering of options and nonoptions. */ + + if (optstring[0] == '-') + { + ordering = RETURN_IN_ORDER; + ++optstring; + } + else if (optstring[0] == '+') + { + ordering = REQUIRE_ORDER; + ++optstring; + } + #ifndef _WIN32 + else if (getenv ("POSIXLY_CORRECT") != NULL) + ordering = REQUIRE_ORDER; + #endif + else + ordering = PERMUTE; + } + + if (nextchar == NULL || *nextchar == '\0') + { + if (ordering == PERMUTE) + { + /* If we have just processed some options following some non-options, + exchange them so that the options come first. */ + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange ((char **) argv); + else if (last_nonopt != optind) + first_nonopt = optind; + + /* Now skip any additional non-options + and extend the range of non-options previously skipped. */ + + while (optind < argc + && (argv[optind][0] != '-' || argv[optind][1] == '\0') +#ifdef GETOPT_COMPAT + && (longopts == NULL + || argv[optind][0] != '+' || argv[optind][1] == '\0') +#endif /* GETOPT_COMPAT */ + ) + optind++; + last_nonopt = optind; + } + + /* Special ARGV-element `--' means premature end of options. + Skip it like a null option, + then exchange with previous non-options as if it were an option, + then skip everything else like a non-option. */ + + if (optind != argc && !strcmp (argv[optind], "--")) + { + optind++; + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange ((char **) argv); + else if (first_nonopt == last_nonopt) + first_nonopt = optind; + last_nonopt = argc; + + optind = argc; + } + + /* If we have done all the ARGV-elements, stop the scan + and back over any non-options that we skipped and permuted. */ + + if (optind == argc) + { + /* Set the next-arg-index to point at the non-options + that we previously skipped, so the caller will digest them. */ + if (first_nonopt != last_nonopt) + optind = first_nonopt; + return EOF; + } + + /* If we have come to a non-option and did not permute it, + either stop the scan or describe it to the caller and pass it by. */ + + if ((argv[optind][0] != '-' || argv[optind][1] == '\0') +#ifdef GETOPT_COMPAT + && (longopts == NULL + || argv[optind][0] != '+' || argv[optind][1] == '\0') +#endif /* GETOPT_COMPAT */ + ) + { + if (ordering == REQUIRE_ORDER) + return EOF; + optarg = argv[optind++]; + return 1; + } + + /* We have found another option-ARGV-element. + Start decoding its characters. */ + + nextchar = (argv[optind] + 1 + + (longopts != NULL && argv[optind][1] == '-')); + } + + if (longopts != NULL + && ((argv[optind][0] == '-' + && (argv[optind][1] == '-' || long_only)) +#ifdef GETOPT_COMPAT + || argv[optind][0] == '+' +#endif /* GETOPT_COMPAT */ + )) + { + const struct option *p; + char *s = nextchar; + int exact = 0; + int ambig = 0; + const struct option *pfound = NULL; + int indfound = 0; + + while (*s && *s != '=') + s++; + + /* Test all options for either exact match or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; + p++, option_index++) + if (!strncmp (p->name, nextchar, s - nextchar)) + { + if (s - nextchar == my_strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else + /* Second nonexact match found. */ + ambig = 1; + } + + if (ambig && !exact) + { + if (opterr) + fprintf (stderr, "%s: option `%s' is ambiguous\n", + argv[0], argv[optind]); + nextchar += my_strlen (nextchar); + optind++; + return BAD_OPTION; + } + + if (pfound != NULL) + { + option_index = indfound; + optind++; + if (*s) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + optarg = s + 1; + else + { + if (opterr) + { + if (argv[optind - 1][1] == '-') + /* --option */ + fprintf (stderr, + "%s: option `--%s' doesn't allow an argument\n", + argv[0], pfound->name); + else + /* +option or -option */ + fprintf (stderr, + "%s: option `%c%s' doesn't allow an argument\n", + argv[0], argv[optind - 1][0], pfound->name); + } + nextchar += my_strlen (nextchar); + return BAD_OPTION; + } + } + else if (pfound->has_arg == 1) + { + if (optind < argc) + optarg = argv[optind++]; + else + { + if (opterr) + fprintf (stderr, "%s: option `%s' requires an argument\n", + argv[0], argv[optind - 1]); + nextchar += my_strlen (nextchar); + return optstring[0] == ':' ? ':' : BAD_OPTION; + } + } + nextchar += my_strlen (nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + /* Can't find it as a long option. If this is not getopt_long_only, + or the option starts with '--' or is not a valid short + option, then it's an error. + Otherwise interpret it as a short option. */ + if (!long_only || argv[optind][1] == '-' +#ifdef GETOPT_COMPAT + || argv[optind][0] == '+' +#endif /* GETOPT_COMPAT */ + || my_index (optstring, *nextchar) == NULL) + { + if (opterr) + { + if (argv[optind][1] == '-') + /* --option */ + fprintf (stderr, "%s: unrecognized option `--%s'\n", + argv[0], nextchar); + else + /* +option or -option */ + fprintf (stderr, "%s: unrecognized option `%c%s'\n", + argv[0], argv[optind][0], nextchar); + } + nextchar = (char *) ""; + optind++; + return BAD_OPTION; + } + } + + /* Look at and handle the next option-character. */ + + { + char c = *nextchar++; + char *temp = my_index (optstring, c); + + /* Increment `optind' when we start to process its last character. */ + if (*nextchar == '\0') + ++optind; + + if (temp == NULL || c == ':') + { + if (opterr) + { +#if 0 + if (c < 040 || c >= 0177) + fprintf (stderr, "%s: unrecognized option, character code 0%o\n", + argv[0], c); + else + fprintf (stderr, "%s: unrecognized option `-%c'\n", argv[0], c); +#else + /* 1003.2 specifies the format of this message. */ + fprintf (stderr, "%s: illegal option -- %c\n", argv[0], c); +#endif + } + optopt = c; + return BAD_OPTION; + } + if (temp[1] == ':') + { + if (temp[2] == ':') + { + /* This is an option that accepts an argument optionally. */ + if (*nextchar != '\0') + { + optarg = nextchar; + optind++; + } + else + optarg = 0; + nextchar = NULL; + } + else + { + /* This is an option that requires an argument. */ + if (*nextchar != '\0') + { + optarg = nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + optind++; + } + else if (optind == argc) + { + if (opterr) + { +#if 0 + fprintf (stderr, "%s: option `-%c' requires an argument\n", + argv[0], c); +#else + /* 1003.2 specifies the format of this message. */ + fprintf (stderr, "%s: option requires an argument -- %c\n", + argv[0], c); +#endif + } + optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = BAD_OPTION; + } + else + /* We already incremented `optind' once; + increment it again when taking next ARGV-elt as argument. */ + optarg = argv[optind++]; + nextchar = NULL; + } + } + return c; + } +} + +int +getopt ( int argc, + char *const *argv, + const char *optstring) + +{ + return _getopt_internal (argc, argv, optstring, + (const struct option *) 0, + (int *) 0, + 0); +} + +int +getopt_long ( int argc, + char *const *argv, + const char *options, + const struct option *long_options, + int *opt_index) + +{ + return _getopt_internal (argc, argv, options, long_options, opt_index, 0); +} + +#endif /* _LIBC or not __GNU_LIBRARY__. */ + +#ifdef TEST + +/* Compile with -DTEST to make an executable for use in testing + the above definition of `getopt'. */ + +int +main (argc, argv) + int argc; + char **argv; +{ + int c; + int digit_optind = 0; + + while (1) + { + int this_option_optind = optind ? optind : 1; + + c = getopt (argc, argv, "abc:d:0123456789"); + if (c == EOF) + break; + + switch (c) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (digit_optind != 0 && digit_optind != this_option_optind) + printf ("digits occur in two different argv-elements.\n"); + digit_optind = this_option_optind; + printf ("option %c\n", c); + break; + + case 'a': + printf ("option a\n"); + break; + + case 'b': + printf ("option b\n"); + break; + + case 'c': + printf ("option c with value `%s'\n", optarg); + break; + + case BAD_OPTION: + break; + + default: + printf ("?? getopt returned character code 0%o ??\n", c); + } + } + + if (optind < argc) + { + printf ("non-option ARGV-elements: "); + while (optind < argc) + printf ("%s ", argv[optind++]); + printf ("\n"); + } + + exit (0); +} + +#endif /* TEST */ + +/* Declarations for getopt. + Copyright (C) 1989-1994, 1996-1999, 2001 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifndef _GETOPT_H + +#ifndef __need_getopt +# define _GETOPT_H 1 +#endif + +/* If __GNU_LIBRARY__ is not already defined, either we are being used + standalone, or this is the first header included in the source file. + If we are being used with glibc, we need to include <features.h>, but + that does not exist if we are standalone. So: if __GNU_LIBRARY__ is + not defined, include <ctype.h>, which will pull in <features.h> for us + if it's from glibc. (Why ctype.h? It's guaranteed to exist and it + doesn't flood the namespace with stuff the way some other headers do.) */ +#if !defined __GNU_LIBRARY__ +# include <ctype.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +extern char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns -1, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +extern int optind; + +/* Callers store zero here to inhibit the error message `getopt' prints + for unrecognized options. */ + +extern int opterr; + +/* Set to an option character which was unrecognized. */ + +extern int optopt; + +#ifndef __need_getopt +/* Describe the long-named options requested by the application. + The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector + of `struct option' terminated by an element containing a name which is + zero. + + The field `has_arg' is: + no_argument (or 0) if the option does not take an argument, + required_argument (or 1) if the option requires an argument, + optional_argument (or 2) if the option takes an optional argument. + + If the field `flag' is not NULL, it points to a variable that is set + to the value given in the field `val' when the option is found, but + left unchanged if the option is not found. + + To have a long-named option do something other than set an `int' to + a compiled-in constant, such as set a value from `optarg', set the + option's `flag' field to zero and its `val' field to a nonzero + value (the equivalent single-letter option character, if there is + one). For long options that have a zero `flag' field, `getopt' + returns the contents of the `val' field. */ + +struct option +{ +# if (defined __STDC__ && __STDC__) || defined __cplusplus + const char *name; +# else + char *name; +# endif + /* has_arg can't be an enum because some compilers complain about + type mismatches in all the code that assumes it is an int. */ + int has_arg; + int *flag; + int val; +}; + +/* Names for the values of the `has_arg' field of `struct option'. */ + +# define no_argument 0 +# define required_argument 1 +# define optional_argument 2 +#endif /* need getopt */ + + +/* Get definitions and prototypes for functions to process the + arguments in ARGV (ARGC of them, minus the program name) for + options given in OPTS. + + Return the option character from OPTS just read. Return -1 when + there are no more options. For unrecognized options, or options + missing arguments, `optopt' is set to the option letter, and '?' is + returned. + + The OPTS string is a list of characters which are recognized option + letters, optionally followed by colons, specifying that that letter + takes an argument, to be placed in `optarg'. + + If a letter in OPTS is followed by two colons, its argument is + optional. This behavior is specific to the GNU `getopt'. + + The argument `--' causes premature termination of argument + scanning, explicitly telling `getopt' that there are no more + options. + + If OPTS begins with `--', then non-option arguments are treated as + arguments to the option '\0'. This behavior is specific to the GNU + `getopt'. */ + +#if (defined __STDC__ && __STDC__) || defined __cplusplus +# ifdef __GNU_LIBRARY__ +/* Many other libraries have conflicting prototypes for getopt, with + differences in the consts, in stdlib.h. To avoid compilation + errors, only prototype getopt for the GNU C library. */ +extern int getopt (int ___argc, char *const *___argv, const char *__shortopts); +# else /* not __GNU_LIBRARY__ */ +// Solaris compilation fix +//extern int getopt (); +# endif /* __GNU_LIBRARY__ */ + +# ifndef __need_getopt +extern int getopt_long (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind); +extern int getopt_long_only (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind); + +/* Internal only. Users should not call this directly. */ +extern int _getopt_internal (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind, + int __long_only); +# endif +#else /* not __STDC__ */ +extern int getopt (); +# ifndef __need_getopt +extern int getopt_long (); +extern int getopt_long_only (); + +extern int _getopt_internal (); +# endif +#endif /* __STDC__ */ + +#ifdef __cplusplus +} +#endif + +/* Make sure we later can get all the definitions and declarations. */ +#undef __need_getopt + +#endif /* getopt.h */ +/*************************************************************************** + help.cpp - description + ------------------- + begin : Die Apr 23 2002 + copyright : (C) 2002 by André Simon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include "help.h" + +namespace Help + { + +/** gibt Hilfetext auf Konsole aus */ + + void printHelp(const std::string & helpFilePath) + { + std::ifstream helpFile(helpFilePath.c_str()); + std::string line; + if (helpFile){ + while (getline(helpFile, line)) + std::cout << line << "\n"; + helpFile.close(); + } + else { + std::cerr <<"highlight: Could not read "<< helpFilePath << "\n"; + } + } + +} +/*************************************************************************** + help.h - description + ------------------- + begin : Die Apr 23 2002 + copyright : (C) 2002 by Andé Simon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#ifndef HELP_H +#define HELP_H + +#include <iostream> +#include <fstream> +#include <string> + +/**\ brief COntains methods for printing help messages + *@author Andre Simon + */ +namespace Help + { + /** print help message to stdout */ + void printHelp(const std::string &); + } + +#endif +/*************************************************************************** + htmlcode.cpp - description + ------------------- + begin : Wed Nov 28 2001 + copyright : (C) 2001 by André Simon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include "htmlgenerator.h" + +using namespace std; + +namespace highlight { + + +HtmlGenerator::HtmlGenerator(void) +{} + +string HtmlGenerator::formatStyleAttributes(const string & elemName, + const ElementStyle & elem) +{ + ostringstream s; + s << "."<<elemName<<"\t{ color:#" + << (elem.getColour().getHexRedValue()) + << (elem.getColour().getHexGreenValue()) + << (elem.getColour().getHexBlueValue() ) + << ( elem.isBold() ?"; font-weight:bold" :"" ) + << ( elem.isItalic() ?"; font-style:italic" :"" ) + << ( elem.isUnderline() ?"; text-decoration:underline" :"" ) + << "; }\n" ; + return s.str(); +} + +string HtmlGenerator::getOpenTag(const string& styleName ){ + return "<span class=\""+styleName+"\">"; +} + +HtmlGenerator::HtmlGenerator ( + const string &cssStyle, + const string &enc, + bool omitEnc, + bool withAnchors) + : CodeGenerator( cssStyle), + brTag("<br>"), + hrTag("<hr>"), + idAttr("name"), + fileSuffix(".html"), + encoding(enc), + omitEncoding(omitEnc), + HTML_FOOTER( + "\n</body>\n</html>\n<!--HTML generated by highlight " + HIGHLIGHT_VERSION + ", " + HIGHLIGHT_URL + "-->\n"), + attachAnchors(withAnchors) +{ + styleTagOpen.push_back(""); + styleTagOpen.push_back(getOpenTag("str")); + styleTagOpen.push_back(getOpenTag("num")); + styleTagOpen.push_back(getOpenTag("slc")); + styleTagOpen.push_back(getOpenTag("com")); + styleTagOpen.push_back(getOpenTag("esc")); + styleTagOpen.push_back(getOpenTag("dir")); + styleTagOpen.push_back(getOpenTag("dstr")); + styleTagOpen.push_back(getOpenTag("line")); + styleTagOpen.push_back(getOpenTag("sym")); + + styleTagClose.push_back(""); + for (int i=1;i<NUMBER_BUILTIN_STYLES; i++) { + styleTagClose.push_back("</span>"); + } + + /*assert (styleTagOpen.size()==styleTagClose.size()); + assert (styleTagOpen.size()==NUMBER_BUILTIN_STYLES); +*/ + newLineTag = "\n"; + spacer = " "; + styleCommentOpen="/*"; + styleCommentClose="*/"; +} + +string HtmlGenerator::getStyleDefinition() +{ + if (styleDefinitionCache.empty()){ + ostringstream os; + os << "body.hl\t{ background-color:#" + << (docStyle.getBgColour().getHexRedValue()) + << (docStyle.getBgColour().getHexGreenValue()) + << (docStyle.getBgColour().getHexBlueValue()) + << "; }\n"; + os << "pre.hl\t{ color:#" + << (docStyle.getDefaultStyle().getColour().getHexRedValue()) + << (docStyle.getDefaultStyle().getColour().getHexGreenValue()) + << (docStyle.getDefaultStyle().getColour().getHexBlueValue() ) + << "; background-color:#" + << (docStyle.getBgColour().getHexRedValue()) + << (docStyle.getBgColour().getHexGreenValue()) + << (docStyle.getBgColour().getHexBlueValue()) + << "; font-size:" + << docStyle.getFontSize() + << "pt; font-family:Courier;}\n"; + os << formatStyleAttributes("num", docStyle.getNumberStyle()) + << formatStyleAttributes("esc", docStyle.getEscapeCharStyle()) + << formatStyleAttributes("str", docStyle.getStringStyle()) + << formatStyleAttributes("dstr", docStyle.getDirectiveStringStyle()) + << formatStyleAttributes("slc", docStyle.getSingleLineCommentStyle()) + << formatStyleAttributes("com", docStyle.getCommentStyle()) + << formatStyleAttributes("dir", docStyle.getDirectiveStyle()) + << formatStyleAttributes("sym", docStyle.getSymbolStyle()) + << formatStyleAttributes("line", docStyle.getLineStyle()); + + KeywordStyles styles = docStyle.getKeywordStyles(); + for (KSIterator it=styles.begin(); it!=styles.end(); it++){ + os << formatStyleAttributes(it->first, *(it->second)); + } + styleDefinitionCache=os.str(); + } + return styleDefinitionCache; +} + +string HtmlGenerator::getHeader(const string &title) +{ + ostringstream os; + os << getHeaderStart((title.empty())?"Source file":title ); + if (langInfo.getSyntaxHighlight()) + { + if (includeStyleDef) //CSS-Definition in HTML-<head> einfuegen + { + os << "<style type=\"text/css\">\n"; + os << "<!--\n"; + os << getStyleDefinition(); + os << CodeGenerator::readUserStyleDef(); + os << "//-->\n"; + os << "</style>" << endl; + } + else //Referenz auf CSS-Datei einfuegen + { + os << "<link rel=\"stylesheet\" type=\"text/css\" href=\"" + << getStyleOutputPath() + << "\"" + << ">\n"; + } + } + os << "</head>\n<body class=\"hl\">\n<pre class=\"hl\">"; + return os.str(); +} + +string HtmlGenerator::getFooter() +{ + return "</pre>" + HTML_FOOTER; +} + + +void HtmlGenerator::printBody() +{ + processRootState(); +} + + + +string HtmlGenerator::maskCharacter(unsigned char c) +{ + switch (c) { + case '<' : + return "<"; + break; + case '>' : + return ">"; + break; + case '&' : + return "&"; + break; + case '\"' : + return """; + break; + + case '@' : + return "@"; + break; + + default : + string m; + return m += c; + } +} + +void HtmlGenerator::insertLineNumber (bool insertNewLine) +{ + if (insertNewLine){ + //*out << getNewLine(); + wsBuffer += getNewLine(); + } + if (showLineNumbers) { + ostringstream numberPrefix; + if (attachAnchors) { + numberPrefix << "<a " + << idAttr + << "=\"l_" + << lineNumber + << "\">"; + } + ostringstream os; + if (lineNumberFillZeroes) os.fill('0'); + os <<setw(LINE_NUMBER_WIDTH)<<right<< lineNumber; + numberPrefix<< styleTagOpen[LINENUMBER] + << os.str() + << spacer + << styleTagClose[LINENUMBER]; + + if (attachAnchors) { + numberPrefix << "</a>"; + } + + wsBuffer += numberPrefix.str(); + } +} + +string HtmlGenerator::getHeaderStart(const string &title){ + ostringstream header; + header<< "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">" + << "\n<html>\n<head>\n"; + if (!omitEncoding){ + header << "<meta http-equiv=\"content-type\" content=\"text/html; charset="<<encoding<<"\">\n"; + } + header << "<title>" << title <<"</title>\n"; + return header.str(); +} + +bool HtmlGenerator::printIndexFile(const vector<string> &fileList, + const string &outPath ){ + string suffix = fileSuffix; + string outFilePath = outPath + "index" + suffix; + ofstream indexfile(outFilePath.c_str()); + + if (!indexfile.fail()){ + string inFileName; + string inFilePath, newInFilePath; + indexfile << getHeaderStart("Source Index" ); + indexfile << "</head>\n<body>\n<h1> Source Index</h1>\n" + << hrTag + << "\n<ul>\n"; + string::size_type pos; + for (unsigned int i=0; i < fileList.size(); i++){ + pos=(fileList[i]).find_last_of(Platform::pathSeparator); + if (pos!=string::npos){ + newInFilePath = (fileList[i]).substr(0, pos+1); + } else { + newInFilePath=Platform::pathSeparator; + } + if (newInFilePath!=inFilePath){ + indexfile << "</ul>\n<h2>"; + indexfile << newInFilePath; + indexfile << "</h2>\n<ul>\n"; + inFilePath=newInFilePath; + } + inFileName = (fileList[i]).substr(pos+1); + indexfile << "<li><a href=\"" << inFileName << suffix << "\">"; + indexfile << inFileName << suffix <<"</a></li>\n"; + } + + indexfile << "</ul>\n" + << hrTag << brTag + << "<small>Generated by highlight " + << HIGHLIGHT_VERSION + << ", <a href=\"" << HIGHLIGHT_URL << "\" target=\"new\">" + << HIGHLIGHT_URL << "</a></small>"; + indexfile << HTML_FOOTER; + } else { + return false; + } + return true; +} + +string HtmlGenerator::getMatchingOpenTag(unsigned int styleID){ + return getOpenTag(langInfo.getKeywordClasses()[styleID]); + } + +string HtmlGenerator::getMatchingCloseTag(unsigned int styleID){ + return "</span>"; +} + +} +/*************************************************************************** + htmlgenerator.h - description + ------------------- + begin : Wed Nov 28 2001 + copyright : (C) 2001 by Andre Simon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + + +#ifndef HTMLGENERATOR_H +#define HTMLGENERATOR_H + +#include <fstream> +#include <iostream> +#include <string> +#include <sstream> + +#include "codegenerator.h" +#include "version.h" +#include "stylecolour.h" +#include "elementstyle.h" +#include "platform_fs.h" + +namespace highlight { + +/** + \brief This class generates HTML. + + It contains information about the resulting document structure (document + header and footer), the colour system, white space handling and text + formatting attributes. + +* @author Andre Simon +*/ + +class HtmlGenerator : public highlight::CodeGenerator + { + public: + + /** Constructor + \param colourTheme Name of Colour theme to use + \param enc encoding name + \param omitEnc switch to omit encoding information + \param withAnchors Test if HTML anchors should be attached to line numbers + */ + HtmlGenerator(const string &colourTheme, + const string &enc, + bool omitEnc=false, + bool withAnchors = false); + + HtmlGenerator(); + + /** Destructor*/ + virtual ~HtmlGenerator() {}; + + /** insert line number in the beginning of the new line + */ + virtual void insertLineNumber(bool insertNewLine=true); + + /** Print document header + \param title Title of the document + */ + string getHeader(const string &title); + + /** Print document body*/ + void printBody(); + + /** Print document footer*/ + string getFooter(); + + /** Print style definitions to external file + \param outFile Path of external style definition + */ + bool printExternalStyle(const string &outFile); + + /** Print index file with all input file names + \param fileList List of output file names + \param outPath Output path + */ + bool printIndexFile(const vector<string> & fileList, const string &outPath); + + protected: + + /** some strings which are similar in HTML and XHTML*/ + string brTag, hrTag, idAttr, fileSuffix; + + /** Output encoding name */ + string encoding; + + /** switch to omit encoding name in file header */ + bool omitEncoding; + + /** HTML footer */ + string HTML_FOOTER; + + /** caches style definition */ + string styleDefinitionCache; + + /** \return CSS definition */ + string getStyleDefinition(); + + /** \return Content of user defined style file */ + string readUserStyleDef(); + + /** \param title Dociment title + \return Start of file header */ + virtual string getHeaderStart(const string &title); + + private: + + /** \param styleName Style name + \return Opening tag of the given style + */ + string getOpenTag(const string& styleName); + + /** \return escaped character*/ + virtual string maskCharacter(unsigned char ); + + /** test if anchors should be appied to line numbers*/ + bool attachAnchors; + + /**\return text formatting attributes in HTML format */ + string formatStyleAttributes(const string & elemName, const ElementStyle & elem); + + /** \param styleID Style ID + \return Opening tag of the given style + */ + string getMatchingOpenTag(unsigned int styleID); + + /** \param styleID Style ID + \return Closing tag of the given style + */ + string getMatchingCloseTag(unsigned int styleID); + }; + +} + +#endif +/*************************************************************************** + languagedefinition.cpp - description + ------------------- + begin : Wed Nov 28 2001 + copyright : (C) 2001 by Andre imon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include "languagedefinition.h" + +using namespace std; + +namespace highlight { + +LanguageDefinition::LanguageDefinition(): + ignoreCase(false), + disableHighlighting(false), + allowExtEscape(false), + vhdl_mode(false), + java_mode(false), + allowNestedComments(true), + fullLineComment(false), + reformatCode(false) +{} + +int LanguageDefinition::isKeyword(const string &s) +{ + if (s.length()) + { + if (keywords.count(s)){ + return keywords[s]; + } + else if (prefixes.count(s[0])){ + return prefixes[s[0]]; + } + } + return 0; +} + +bool LanguageDefinition::isPrefix(unsigned char c) +{ + return ( prefixes.count(c)); +} + +void LanguageDefinition::addSimpleSymbol(stringstream& symbolStream, + State state, + const string& paramValues ) { + istringstream valueStream(paramValues); + bool valExists=false; + string value; + while (valueStream >> value) + { + symbolStream << " " << value; + valExists = true; + } + if (valExists) + { + symbolStream << " " << state; + } +} + +void LanguageDefinition::addDelimiterSymbol(stringstream& symbolStream, + State stateBegin, State stateEnd, + const string& paramValues, + unsigned int classID) { + istringstream valueStream(paramValues); + string delimPrefix, delimSuffix; + while (valueStream>>delimPrefix){ + valueStream >> delimSuffix; + symbolStream << " "<<delimPrefix <<" " << stateBegin; + symbolStream <<" "<< delimSuffix<<" "<< stateEnd; + delimiterPrefixes.insert(make_pair(delimPrefix, classID)); + }; +} + +bool LanguageDefinition::getFlag( string& paramValue){ + return (StringTools::lowerCase(paramValue)=="true"); +} + +unsigned char LanguageDefinition::getSymbol(const string& paramValue){ + istringstream valueStream(paramValue); + unsigned char symbol; + valueStream >> symbol; + return symbol; +} + +void LanguageDefinition::addKeywords(const string &kwList, + int classID){ + istringstream valueStream(kwList); + string keyword; + while (valueStream >> keyword){ + keywords.insert(make_pair(keyword, classID)); + } +} + +unsigned int LanguageDefinition::generateNewKWClass(const string& newClassName){ + unsigned int newClassID=0; + bool found=false; + while (newClassID<keywordClasses.size() && !found){ + found= (newClassName==keywordClasses[newClassID++]); + } + if (!found){ + newClassID++; + keywordClasses.push_back(newClassName); + } + return newClassID; +} + +unsigned int LanguageDefinition::getDelimPrefixClassID(const string& prefix){ + if (delimiterPrefixes.count(prefix)){ + return delimiterPrefixes[prefix]; + } + return 0; +} + +bool LanguageDefinition::load(const string& langDefPath, bool clear) +{ + if (clear) reset(); + + ConfigurationReader langDef(langDefPath); + if (langDef.found()) + { + currentPath=langDefPath; + disableHighlighting=false; + string token; + stringstream symbolStrStream; + + //Stringstream zum Einlesen der Token: + istringstream valueStream; + + addDelimiterSymbol(symbolStrStream, ML_COMMENT_BEGIN, ML_COMMENT_END, + langDef.getParameter("ml_comment")); + + addSimpleSymbol(symbolStrStream, SL_COMMENT, + langDef.getParameter("sl_comment")); + + addSimpleSymbol(symbolStrStream, ESC_CHAR, + langDef.getParameter("escchar")); + + addSimpleSymbol(symbolStrStream, DIRECTIVE_LINE, + langDef.getParameter("directive")); + + addSimpleSymbol(symbolStrStream, DIRECTIVE_LINE_END, + langDef.getParameter("directiveend")); + + addSimpleSymbol(symbolStrStream, STRING, + langDef.getParameter("stringdelimiters")); + + ignoreCase=getFlag(langDef.getParameter("ignorecase")); + allowNestedComments=getFlag(langDef.getParameter("allownestedcomments")); + vhdl_mode=getFlag(langDef.getParameter("vhdl_mode")); + java_mode=getFlag(langDef.getParameter("java_mode")); + disableHighlighting=getFlag(langDef.getParameter("disablehighlighting")); + fullLineComment=getFlag(langDef.getParameter("fl_comment")); + reformatCode=getFlag(langDef.getParameter("reformatting")); + rawStringPrefix=getSymbol(langDef.getParameter("rawstringprefix")); + continuationChar=getSymbol(langDef.getParameter("continuationsymbol")); + allowExtEscape=getFlag(langDef.getParameter("allowextescape")); + + string paramName, className, classValue; + vector<string> paramNames=langDef.getParameterNames(); + for (unsigned int i=0;i<paramNames.size();i++){ + paramName=paramNames[i]; + className=StringTools::getParantheseVal(paramName); + classValue=langDef.getParameter(paramName); + if (paramName.find("kw_list") != string::npos ){ + addKeywords(classValue, generateNewKWClass(className)); + } + if (paramName.find("kw_prefix") != string::npos){ + prefixes.insert(make_pair(classValue[0], generateNewKWClass(className))); + } + if (paramName.find("kw_delim") != string::npos ){ + addDelimiterSymbol(symbolStrStream, KEYWORD_BEGIN, KEYWORD_END, + classValue, generateNewKWClass(className)); + } + if (paramName.find("tag_delim") != string::npos ){ + addDelimiterSymbol(symbolStrStream, TAG_BEGIN, TAG_END, + classValue, generateNewKWClass(className)); + } + } + + // zuletzt einlesen, um Probleme mit Delimitern, die Zeichen der + // Symbolliste enthalten, zu vermeiden + addSimpleSymbol(symbolStrStream, SYMBOL, langDef.getParameter("symbols")); + + valueStream.str(langDef.getParameter("allowedchars")); + while (valueStream >> token ) + { + allowedChars += token; + } + symbolString = symbolStrStream.str(); + + string fileToInclude=langDef.getParameter("include"); + if (!fileToInclude.empty()){ + string::size_type Pos = langDefPath.find_last_of(Platform::pathSeparator); + string includeLangDefPath = langDefPath.substr(0, Pos+1) + fileToInclude; + load(includeLangDefPath, false); + } + return true; + } + else + { + currentPath.clear(); + return false; + } +} + +void LanguageDefinition::reset() +{ + keywords.clear(); + keywordClasses.clear(); + delimiterPrefixes.clear();; + prefixes.clear(); + allowedChars.clear(); + ignoreCase= false; + java_mode= vhdl_mode= false; + allowNestedComments= reformatCode = false; + rawStringPrefix = continuationChar = '\0'; + disableHighlighting=false; + fullLineComment=false; +} + +bool LanguageDefinition::isVHDL() +{ + return vhdl_mode; +} + +bool LanguageDefinition::isJava() +{ + return java_mode; +} + +bool LanguageDefinition::allowNestedMLComments(){ + return allowNestedComments; +} + +bool LanguageDefinition::highlightingDisabled(){ + return disableHighlighting; +} + +bool LanguageDefinition::isFullLineComment(){ + return fullLineComment; +} + +bool LanguageDefinition::needsReload(const string &langDefPath){ + return currentPath!=langDefPath; +} + +bool LanguageDefinition::enableReformatting(){ + return reformatCode; +} + +const KeywordMap& LanguageDefinition::getKeywords() const{ + return keywords; +} + +string &LanguageDefinition::getSymbolString() { + return symbolString; +} + +unsigned char LanguageDefinition::getRawStringPrefix(){ + return rawStringPrefix; +} + +unsigned char LanguageDefinition::getContinuationChar(){ + return continuationChar; +} + +string &LanguageDefinition::getAllowedChars() { + return allowedChars; +} + +bool LanguageDefinition::getSyntaxHighlight() { + return !disableHighlighting; +} + +bool LanguageDefinition::isIgnoreCase() { + return ignoreCase; +} + +const vector<string>&LanguageDefinition::getKeywordClasses() const{ + return keywordClasses; +} + +bool LanguageDefinition::allowExtEscSeq() { + return allowExtEscape; +} + +} +/*************************************************************************** + languagedefinition.h - description + ------------------- + begin : Wed Nov 28 2001 + copyright : (C) 2001 by Andre Simon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#ifndef LANGUAGEDEFINITION_H +#define LANGUAGEDEFINITION_H + +#include <string> +#include <map> +#include <iostream> +#include <fstream> +#include <iterator> +#include <sstream> + +#include "configurationreader.h" +//#include "stringtools.h" +#include "platform_fs.h" +#include "enums.h" + + +namespace highlight { + +/** maps keywords and the corresponding class IDs*/ +typedef map <string, int> KeywordMap; + +/** maps keyword prefixes and the corresponding class IDs*/ +typedef map <unsigned char, int> PrefixMap; + +/**\brief Contains specific data of the programming language being processed. + + The load() method will only read a new language definition if the given + file path is not equal to the path of the current language definition. + +* @author Andre Simon +*/ + +class LanguageDefinition { + + public: + + LanguageDefinition(); + + /**\return Symbol string, containg all known symbols with the referencing state ids*/ + string &getSymbolString(); + + /** \return Prefix of raw strings */ + unsigned char getRawStringPrefix(); + + /** \return Continuation Character */ + unsigned char getContinuationChar(); + + /** \return List of characters allowed within identifiers */ + string &getAllowedChars(); + + /** \return true if syntax highlighting is enabled*/ + bool getSyntaxHighlight(); + + /** \return True if language is case sensitive */ + bool isIgnoreCase(); + + /** \param s String + \return class id of keyword, 0 if s is not a keyword */ + int isKeyword(const string &s); + + + /** \return true if c is member of prefix list*/ + bool isPrefix(unsigned char c); + + /** Load new language definition + \param langDefPath Path of language definition + \param clear Test if former data should be deleted + \return True if successfull */ + bool load(const string& langDefPath, bool clear=true); + + /** \return True if programming language is VHDL */ + bool isVHDL(); + + /** \return True if programming language is Java */ + bool isJava(); + + /** \return True if multi line comments may be nested */ + bool allowNestedMLComments(); + + /** \return True if highlighting is disabled */ + bool highlightingDisabled(); + + /** \return True if single line comments must start at coloumn 1 */ + bool isFullLineComment(); + + /** \return True the next load() call will load a new language definition + \param langDefPath Path to language definition */ + bool needsReload(const string &langDefPath); + + /** \return True if current language may be reformatted (c, c++, c#, java) */ + bool enableReformatting(); + + /** \return True if escape sequences are allowed outsde of strings */ + bool allowExtEscSeq(); + + /** \return Class ID of given keyword delimiter prefix + \param prefix Keyword delimiter prefix */ + unsigned int getDelimPrefixClassID(const string& prefix); + + /** \return keywords*/ + const KeywordMap& getKeywords() const; + + /** \return keyword classes*/ + const vector<string>& getKeywordClasses() const; + + private: + // string containing symbols and their IDs of the programming language + string symbolString; + + // string with special characters that may occour in keywords + string allowedChars; + + // path to laoed language definition + string currentPath; + + KeywordMap keywords; + + vector <string> keywordClasses; + + KeywordMap delimiterPrefixes; + + PrefixMap prefixes; + + // keywords are not case sensitive if set + bool ignoreCase, + disableHighlighting, + allowExtEscape, + + // switch to enable VHDL workarounds + vhdl_mode, + + // switch to enable Java workarounds + java_mode, + + // allow nested multi line comment blocks + allowNestedComments, + + // single line comments have to start in coloumn 1 if set + fullLineComment, + + // code formatting is enabled if set + reformatCode; + + // Character, die eine Variable bzw. ein Keyword kennzeichnen + unsigned char rawStringPrefix, + continuationChar; + + /** setzt Membervariablen auf Defaultwerte */ + void reset(); + + // add a symbol sequencs to the symbolStream + void addSimpleSymbol(stringstream& symbolStream, State state, + const string& paramValues ); + + // add a delimiter symbol sequencs to the symbolStream + void addDelimiterSymbol(stringstream& symbolStream, + State stateBegin, State stateEnd, + const string& paramValues, + unsigned int classID=0); + + bool getFlag( string& paramValue); + + unsigned char getSymbol(const string& paramValue); + + // generate a unique class ID if the class name + unsigned int generateNewKWClass(const string& newClassName); + + // add keywords to the given class + void addKeywords(const string &kwList, int classID); + + }; + +} +#endif +/*************************************************************************** + LatexCode.cpp - description + ------------------- + begin : Mit Jul 24 2002 + copyright : (C) 2002 by André Simon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include "latexgenerator.h" + +namespace highlight { + +LatexGenerator::LatexGenerator(const string &colourTheme, + bool replQuotes) + : CodeGenerator(colourTheme), + replaceQuotes(replQuotes) +{ + styleTagOpen.push_back( "\\hlstd{"); + styleTagOpen.push_back( "\\hlstr{"); + styleTagOpen.push_back( "\\hlnum{"); + styleTagOpen.push_back( "\\hlslc{"); + styleTagOpen.push_back( "\\hlcom{"); + styleTagOpen.push_back( "\\hlesc{"); + styleTagOpen.push_back( "\\hldir{"); + styleTagOpen.push_back( "\\hldstr{"); + styleTagOpen.push_back( "\\hlline{"); + styleTagOpen.push_back( "\\hlsym{"); + + for (int i=0;i<NUMBER_BUILTIN_STYLES; i++){ + styleTagClose.push_back( "}"); + } + + // avoid "Underfull \hbox (badness 10000)" warnings + newLineTag = "\\\\\n"; + longLineTag = "\\hspace*{\\fill}" + newLineTag; + + spacer = "\\ "; + + maskWs=true; + maskWsBegin = "\\hlstd{"; + maskWsEnd = "}"; + + excludeWs=true; + + styleCommentOpen="%"; +} + +LatexGenerator::LatexGenerator() +{} +LatexGenerator::~LatexGenerator() +{} + +string LatexGenerator::formatStyleAttributes(const string & elemName, + const ElementStyle &elem) +{ + ostringstream s; + s << "\\newcommand{\\hl" + << elemName + << "}[1]{\\textcolor[rgb]{" + << elem.getColour().getLatexRedValue() << "," + << elem.getColour().getLatexGreenValue() << "," + << elem.getColour().getLatexBlueValue() + << "}{"; + + if (elem.isBold()) + s << "\\bf{"; + if (elem.isItalic()) + s << "\\it{"; + + s <<"#1"; + + if (elem.isBold()) + s << "}"; + if (elem.isItalic()) + s << "}"; + + s <<"}}\n"; + return s.str(); +} + +void LatexGenerator::printBody() +{ + *out << "\\noindent\n" + << "\\ttfamily\n"; + + processRootState(); + + *out << "\\mbox{}\n" + << "\n\\normalfont\n"; +} + +string LatexGenerator::getHeader(const string & title) +{ + ostringstream os; + os << "\\documentclass{article}\n" + << "\\usepackage{color}\n" + << "\\usepackage{alltt}\n"; + + if (langInfo.getSyntaxHighlight()) { + if (includeStyleDef) { + os << "\n"<<getStyleDefinition(); + os << CodeGenerator::readUserStyleDef(); + } else { + os << "\n\\input {" + << getStyleOutputPath() + << "}\n"; + } + } + + os << "\n\\title{" << title << "}\n" + << "\\begin{document}\n" + << "\\pagecolor{bgcolor}\n"; + return os.str(); +} + +string LatexGenerator::getFooter() +{ + ostringstream os; + os << "\\end {document}\n" + << "(* LaTeX generated by highlight " + << HIGHLIGHT_VERSION + << ", " + << HIGHLIGHT_URL + << " *)\n"; + return os.str(); +} + +string LatexGenerator::getNewLine(){ + return (showLineNumbers)? newLineTag:longLineTag; +} + +string LatexGenerator::maskCharacter(unsigned char c) +{ + switch (c) + { + case '<' : + return "$<$"; + break; + case '>' : + return "$>$"; + break; + case '{': + case '}': + case '&': + case '$': + case '#': + case '%': + { + string m; + m ="\\"; + m += c; + return m; + } + break; + case '\"': + return (fragmentOutput && replaceQuotes)?"\\dq{}":"\""; + break; + case '_': + return "\\textunderscore "; + break; + case '^': + return "\\textasciicircum "; + break; + case '\\': + return "$\\backslash$"; + break; + case '~': + return "$\\sim$"; + break; + case '|': + return "\\textbar "; + break; + // avoid latex compilation failure if [ or * follows a line break (\\) + case '*': + case '[': + case ']': + // avoid "merging" of consecutive '-' chars when included in bold font ( \bf ) + case '-': + { + string m; + m= "{"; + m+= c; + m+= "}"; + return m; + } + break; + case ' ': + return spacer; + break; + case AUML_LC: + return "\\\"a"; + break; + case OUML_LC: + return "\\\"o"; + break; + case UUML_LC: + return "\\\"u"; + break; + case AUML_UC: + return "\\\"A"; + break; + case OUML_UC: + return "\\\"O"; + break; + case UUML_UC: + return "\\\"U"; + break; + case AACUTE_LC: + return "\\'a"; + break; + case EACUTE_LC: + return "\\'e"; + break; + case OACUTE_LC: + return "\\'o"; + break; + case UACUTE_LC: + return "\\'u"; + break; + case AGRAVE_LC: + return "\\`a"; + break; + case EGRAVE_LC: + return "\\`e"; + break; + case OGRAVE_LC: + return "\\`o"; + break; + case UGRAVE_LC: + return "\\`u"; + break; + case AACUTE_UC: + return "\\'A"; + break; + case EACUTE_UC: + return "\\'E"; + break; + case OACUTE_UC: + return "\\'O"; + break; + case UACUTE_UC: + return "\\'U"; + break; + case AGRAVE_UC: + return "\\`A"; + break; + case EGRAVE_UC: + return "\\`E"; + break; + case UGRAVE_UC: + return "\\`O"; + break; + case OGRAVE_UC: + return "\\`U"; + break; + case SZLIG: + return "\\ss "; + break; + /* #ifndef _WIN32 + // skip first byte of multibyte chracters + case 195: + return string(""); + break; +#endif*/ + + default : + { + string m; + return m+=c; + } + } +} + +string LatexGenerator::getMatchingOpenTag(unsigned int styleID){ + return "\\hl"+langInfo.getKeywordClasses()[styleID]+"{"; + } + +string LatexGenerator::getMatchingCloseTag(unsigned int styleID){ + return "}"; +} + + +string LatexGenerator::getStyleDefinition() +{ + if (styleDefinitionCache.empty()){ + ostringstream os; + os << formatStyleAttributes("std", docStyle.getDefaultStyle()); + os << formatStyleAttributes("num", docStyle.getNumberStyle()); + os << formatStyleAttributes("esc", docStyle.getEscapeCharStyle()); + os << formatStyleAttributes("str", docStyle.getStringStyle()); + os << formatStyleAttributes("dstr", docStyle.getDirectiveStringStyle()); + os << formatStyleAttributes("slc", docStyle.getSingleLineCommentStyle()); + os << formatStyleAttributes("com", docStyle.getCommentStyle()); + os << formatStyleAttributes("dir", docStyle.getDirectiveStyle()); + os << formatStyleAttributes("sym", docStyle.getSymbolStyle()); + os << formatStyleAttributes("line", docStyle.getLineStyle()); + + KeywordStyles styles = docStyle.getKeywordStyles(); + for (KSIterator it=styles.begin(); it!=styles.end(); it++){ + os << formatStyleAttributes(it->first, *(it->second)); + } + os << "\\definecolor{bgcolor}{rgb}{" + << docStyle.getBgColour().getLatexRedValue() << "," + << docStyle.getBgColour().getLatexGreenValue() << "," + << docStyle.getBgColour().getLatexBlueValue() + << "}\n"; + os << "\\oddsidemargin -3mm\n\\textwidth 165,2truemm\n" + << "\\topmargin 0truept\n\\headheight 0truept\n" + << "\\headsep 0truept\n\\textheight 230truemm\n"; + + styleDefinitionCache=os.str(); + } + return styleDefinitionCache; +} + + +} +/*************************************************************************** + latexgenerator.h - description + ------------------- + begin : Mit Jul 24 2002 + copyright : (C) 2002 by André Simon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#ifndef LATEXGENERATOR_H +#define LATEXGENERATOR_H + +#include <string> +#include <iostream> +#include <sstream> + +#include "codegenerator.h" +#include "version.h" +#include "charcodes.h" + + +namespace highlight { + +/** + \brief This class generates LaTeX. + + It contains information about the resulting document structure (document + header and footer), the colour system, white space handling and text + formatting attributes. + +* @author Andre Simon +*/ + +class LatexGenerator : public highlight::CodeGenerator + { + public: + + /** Constructor + \param colourTheme Name of Colour theme to use + \param replQuotes Test if quotes shold be replaced by \ dq + */ + LatexGenerator(const string &colourTheme, + bool replQuotes=false); + LatexGenerator(); + ~LatexGenerator(); + + /** prints document header + \param title Title of the document + */ + string getHeader(const string & title); + + /** Prints document footer*/ + string getFooter(); + + /** Prints document body*/ + void printBody(); + + private: + + string styleDefinitionCache; + string longLineTag; + + /** \return escaped character*/ + virtual string maskCharacter(unsigned char ); + + /**\return text formatting attributes in LaTeX format */ + string formatStyleAttributes(const string & elemName, + const ElementStyle & elem); + + /** test if double quotes should be replaced by \dq{} */ + bool replaceQuotes; + + string getNewLine(); + + string getStyleDefinition(); + + string getMatchingOpenTag(unsigned int styleID); + string getMatchingCloseTag(unsigned int styleID); + }; + +} + +#endif +/*************************************************************************** + main.cpp - description + ------------------- + begin : Die Apr 23 22:16:35 CEST 2002 + copyright : (C) 2002-2004 by André Simon + email : andre.simon1@gmx.de + + + Highlight is a universal source code to HTML converter. Syntax highlighting + is formatted by Cascading Style Sheets. It's possible to easily enhance + highlight's parsing database. + + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include "main.h" + +using namespace std; + +void HighlightApp::printVersionInfo() +{ + cout << "\n highlight version " + << HIGHLIGHT_VERSION + << "\n Copyright (C) 2002-2005 Andre Simon <andre.simon1@gmx.de>" + << "\n\n Artistic Style Classes (1.15.3)" + << "\n Copyright (C) 1998-2002 Tal Davidson <davidsont@bigfoot.com>" + << "\n\n Dirstream Classes (0.4)" + << "\n Copyright (C) 2002-2004 Benjamin Kaufmann <hume@c-plusplus.de>" + << "\n\n This software is released under the terms of the GNU General " + << "Public License." + << "\n For more information about these matters, see the file named " + << "COPYING.\n\n"; + #ifdef USE_LOCAL_GETOPT + cout << " (Built with USE_LOCAL_GETOPT flag set.)\n"; + #endif + #ifdef HL_DATA_DIR + cout << " (HL_DATA_DIR: \"" <<HL_DATA_DIR<< "\" )\n"; + #endif +} + +void HighlightApp::printBadInstallationInfo() +{ + cerr << "highlight: Data directory not found. Bad installation or wrong " + << OPT_DATADIR << " parameter." + << "\n\nCopy the highlight files into one of the directories listed " + << "in INSTALL.\nYou may also set the data directory with " + << OPT_DATADIR << " and " << OPT_ADDDATADIR << ".\n"; +} + +bool HighlightApp::listInstalledFiles(bool showThemes) +{ + vector <string> filePaths; + string wildcard=(showThemes)? "*.style":"*.lang"; + unsigned int suffixLength=wildcard.length()-1; + + string searchDir = ((showThemes) ? dataDir.getThemeDir(): + dataDir.getLangDefDir()) + wildcard; + + bool directoryOK = Platform::getDirectoryEntries(filePaths, searchDir, true); + if (!directoryOK) { + cerr << "highlight: Could not access directory " + << searchDir + << ", aborted.\n"; + return false; + } + + cout << "\n Installed " + << ((showThemes)? "themes":"language definitions ") + << "(located in " + << ((showThemes)?dataDir.getThemeDir():dataDir.getLangDefDir()) + << ") :\n" + << endl; + + sort(filePaths.begin(), filePaths.end()); + string temp; + + for (unsigned int i=0;i< filePaths.size(); i++){ + if (showThemes) + temp = (filePaths[i]).substr(dataDir.getThemeDir().length()); + else + temp = (filePaths[i]).substr(dataDir.getLangDefDir().length()); + cout << " "<<temp.substr(0, temp.length()- suffixLength) << endl; + } + cout <<"\n Use name of the desired " + << ((showThemes)?"theme":"language") + << " with the --" + << ((showThemes)? OPT_STYLE : OPT_SYNTAX) + << " option.\n" << endl; + return true; +} + +void HighlightApp::printDebugInfo(highlight::LanguageDefinition &lang, + const string & langDefPath) +{ + cerr << "\nLoading language definition: " << langDefPath; + cerr << "\n\nSYMBOLS: " + << lang.getSymbolString(); + cerr << "\n\nKEYWORDS: "; + highlight::KeywordMap::iterator it; + highlight::KeywordMap keys=lang.getKeywords(); + cerr << "\n\nID Keyword \n"; + for (it=keys.begin(); it!=keys.end();it++){ + cerr << it->second + << " <- \"" + << it->first <<"\"\n"; + } + cerr <<"\n"; +} + +string HighlightApp::getFileSuffix(const string &fileName) { + size_t ptPos=fileName.rfind("."); + return (ptPos == string::npos) ? + "" : fileName.substr(ptPos+1, fileName.length()); +} + +bool HighlightApp::loadMapConfig(const string& name, StringMap* map){ + string extPath=dataDir.getDir() + name + ".conf"; + ConfigurationReader config(extPath); + if (config.found() ) + { + stringstream values; + string paramName, paramVal; + for (unsigned int i=0;i<config.getParameterNames().size();i++){ + paramName = config.getParameterNames()[i]; + values.str(config.getParameter(paramName)) ; + while (values >> paramVal) { + map->insert(make_pair( paramVal, paramName)); + } + values.clear(); + } + return true; + } else { + cerr << "highlight: Configuration file "<< extPath << " not found.\n"; + return false; + } +} + + +int HighlightApp::getNumDigits(int i){ + int res=0; + while (i){ + i/=10; + ++res; + } + return res; +} + +void HighlightApp::printProgressBar(int total, int count){ + if (!total) return; + int p=100*count / total; + int numProgressItems=p/10; + cout << "\r["; + for (int i=0;i<10;i++){ + cout <<((i<numProgressItems)?"#":" "); + } + cout<< "] " <<setw(3)<<p<<"%, "<<count << " / " << total << " " <<flush; + if (p==100) { + cout << endl; + } +} + +void HighlightApp::printCurrentAction(const string&outfilePath, + int total, int count, int countWidth){ + cout << "Writing file " + << setw(countWidth)<< count + << " of " + << total + << ": " + << outfilePath + << "\n"; +} + +void HighlightApp::printIOErrorReport(unsigned int numberErrorFiles, + vector<string> & fileList, + const string &action){ + cerr << "highlight: Could not " + << action + << " file" + << ((numberErrorFiles>1)?"s":"")<<":\n"; + copy (fileList.begin(), fileList.end(), ostream_iterator<string>(cerr, "\n")); + if (fileList.size() < numberErrorFiles) { + cerr << "... [" + << (numberErrorFiles - fileList.size() ) + << " of " + << numberErrorFiles + << " failures not shown, use --" + << OPT_VERBOSE + << " switch to print all paths]\n"; + } +} + +string HighlightApp::analyzeShebang(const string& file){ + if (scriptShebangs.empty()) loadMapConfig("scriptre", &scriptShebangs); + ifstream inFile(file.c_str()); + string firstLine; + getline (inFile, firstLine); + return scriptShebangs[StringTools::trimRight(firstLine)]; +} + +string HighlightApp::guessFileType(const string& suffix, const string &inputFile) +{ + if (extensions.empty()) loadMapConfig("extensions", &extensions); + string fileType = (extensions.count(suffix)) ? extensions[suffix] : suffix ; + if (!fileType.empty()) return fileType; + return analyzeShebang(inputFile); +} + + +int HighlightApp::run(int argc, char**argv){ + + //get command line options + CmdLineOptions options(argc, argv); + + // set data directory path, where /langDefs and /themes reside + string highlightRootDir = Platform::getAppPath(); + + // determine highlight data directory + if (! dataDir.searchDataDir((options.dataDirGiven())? + options.getDataDir(): highlightRootDir)){ + printBadInstallationInfo(); + return EXIT_FAILURE; + } + + if (options.additionalDataDirGiven()){ + dataDir.setAdditionalDataDir(options.getAdditionalDataDir()); + } + + if (options.printVersion()) { + printVersionInfo(); + return EXIT_SUCCESS; + } + + if (options.printHelp()) { + Help::printHelp(dataDir.getHelpMsgDir() + options.getHelpLang()); + return EXIT_SUCCESS; + } + + if (options.showThemes() || options.showLangdefs()) { + return listInstalledFiles(options.showThemes())?EXIT_SUCCESS:EXIT_FAILURE; + } + + // list of input files + const vector <string> inFileList=options.getInputFileNames(); + + string stylePath=dataDir.searchForTheme(options.getStyleName()); + + highlight::CodeGenerator *generator = + highlight::CodeGenerator::getInstance(options.getOutputType(), + stylePath, + options.getStyleInFilename(), + options.getStyleOutFilename(), + options.getCharSet(), + options.includeStyleDef(), + options.attachLineAnchors(), + options.replaceQuotes(), + options.fopCompatible(), + options.getNumberSpaces(), + options.getWrappingStyle(), + options.printLineNumbers(), + options.fillLineNrZeroes(), + options.fragmentOutput(), + options.omitEncodingName() ); + + assert (generator!=NULL); + + bool styleFileWanted = !options.fragmentOutput() || options.styleOutPathDefined(); + + if (!generator->styleFound() ) { + cerr << "highlight: Could not find style " + << stylePath + << ".\n"; + highlight::CodeGenerator::deleteInstance(); + return EXIT_FAILURE; + } + + if (!options.getIndentScheme().empty()){ + string indentSchemePath = + dataDir.searchForIndentScheme(options.getIndentScheme()+".indent"); + if (!generator->initIndentationScheme(indentSchemePath)){ + cerr << "highlight: Could not find indentation scheme " + << indentSchemePath + << ".\n"; + highlight::CodeGenerator::deleteInstance(); + return EXIT_FAILURE; + } + } + + string outDirectory = options.getOutDirectory(); + if (!outDirectory.empty() && !options.quietMode() && !dirstr::directory_exists(outDirectory) ){ + cerr << "highlight: Output directory \"" + << outDirectory + << "\" does not exist.\n"; + return EXIT_FAILURE; + } + + bool initError=false, IOError=false; + + if ( !options.includeStyleDef() + && (styleFileWanted) + && options.formatSupportsExtStyle()) { + string cssOutFile=outDirectory + options.getStyleOutFilename(); + bool success=generator->printExternalStyle (cssOutFile); + if (!success){ + cerr << "highlight: Could not write " << cssOutFile <<".\n"; + IOError = true; + } + } + + if (options.printIndexFile()){ + bool success=generator -> printIndexFile(inFileList, outDirectory); + if (!success){ + cerr << "highlight: Could not write index file.\n"; + IOError = true; + } + } + + unsigned int fileCount=inFileList.size(), + fileCountWidth=getNumDigits(fileCount), + i=0, + numBadFormatting=0, + numBadInput=0, + numBadOutput=0; + + vector<string> badFormattedFiles, badInputFiles, badOutputFiles; + string outFilePath; + string suffix, lastSuffix; + + if (options.syntaxGiven()) { // user defined language definition, valid for all files + suffix = guessFileType(options.getLanguage()); + } + + while (i < fileCount && !initError) { + if (!options.syntaxGiven()) { // determine file type for each file + suffix = guessFileType(getFileSuffix(inFileList[i]), inFileList[i]); + } + if (suffix.empty()) { + if (!options.enableBatchMode() && !styleFileWanted) + cerr << "highlight: Undefined language definition. Use --" + << OPT_SYNTAX << " option.\n"; + if (!options.forceOutput()){ + initError = true; + break; + } + } + + if (suffix != lastSuffix) { + string langDefPath=dataDir.searchForLangDef(suffix+".lang"); + highlight::LoadResult loadRes= generator->initLanguage(langDefPath); + if (loadRes==highlight::LOAD_FAILED){ + cerr << "highlight: Unknown source file extension \"" + << suffix + << "\".\n"; + if (!options.forceOutput()){ + initError = true; + break; + } + } + if (options.printDebugInfo() && loadRes==highlight::LOAD_NEW){ + printDebugInfo(generator->getLanguage(), langDefPath); + } + lastSuffix = suffix; + } + + if (options.enableBatchMode()){ + string::size_type pos=(inFileList[i]).find_last_of(Platform::pathSeparator); + outFilePath = outDirectory; + outFilePath += inFileList[i].substr(pos+1); + outFilePath += options.getOutFileSuffix(); + + if (!options.quietMode()) { + if (options.printProgress()){ + printProgressBar(fileCount, i+1); + } else { + printCurrentAction(outFilePath, fileCount, i+1, fileCountWidth); + } + } + } else { + outFilePath = options.getSingleOutFilename(); + } + + highlight::ParseError error = generator->printOutput(inFileList[i], outFilePath); + if (error==highlight::BAD_INPUT){ + if (numBadInput++ < IO_ERROR_REPORT_LENGTH || options.printDebugInfo()) { + badInputFiles.push_back(inFileList[i]); + } + } else if (error==highlight::BAD_OUTPUT){ + if (numBadOutput++ < IO_ERROR_REPORT_LENGTH || options.printDebugInfo()) { + badOutputFiles.push_back(outFilePath); + } + } + if (options.formattingEnabled() && !generator->formattingIsPossible()){ + if (numBadFormatting++ < IO_ERROR_REPORT_LENGTH || options.printDebugInfo()) { + badFormattedFiles.push_back(outFilePath); + } + } + ++i; + } + + if (numBadInput){ + printIOErrorReport(numBadInput, badInputFiles, "read input"); + IOError = true; + } + if (numBadOutput){ + printIOErrorReport(numBadOutput, badOutputFiles, "write output"); + IOError = true; + } + if (numBadFormatting){ + printIOErrorReport(numBadFormatting, badFormattedFiles, "reformat"); + } + + highlight::CodeGenerator::deleteInstance(); + return (initError || IOError) ? EXIT_FAILURE : EXIT_SUCCESS; +} + + +int main(int argc, char **argv) { + HighlightApp app; + return app.run(argc, argv); +} +// +// C++ Interface: main +// +// Description: +// +// +// Author: Andre Simon <andre.simon1@gmx.de>, (C) 2004 +// +// Copyright: See COPYING file that comes with this distribution +// +// + +#ifndef HIGHLIGHT_APP +#define HIGHLIGHT_APP + + +#include <iostream> +#include <fstream> +#include <string> +#include <vector> +#include <map> +#include <iomanip> +#include <cassert> + +#include "./dirstream0.4/dirstream.h" +#include "cmdlineoptions.h" +#include "configurationreader.h" +#include "codegenerator.h" +#include "help.h" +#include "datadir.h" +#include "version.h" +#include "platform_fs.h" + +#define IO_ERROR_REPORT_LENGTH 5 +#define SHEBANG_CNT 12 + +typedef map<string, string> StringMap; + +/** Main application class + @author Andre Simon +*/ + +class HighlightApp { + +public: + + HighlightApp(){}; + ~HighlightApp(){}; + + /** Start application + \param argc Number of command line arguments + \param argv values of command line arguments + \return EXIT_SUCCESS or EXIT_FAILURE + */ + int run(int argc, char **argv); + +private: + + DataDir dataDir; + StringMap extensions; + StringMap scriptShebangs; + + /** print version info*/ + void printVersionInfo(); + + /** print error message*/ + void printBadInstallationInfo(); + + /** print input and output errors */ + void printIOErrorReport(unsigned int numberErrorFiles, vector<string> & fileList, const string &action); + + /** print installed files + \param showThemes Print installed themes if true, language definitions otherwise + */ + bool listInstalledFiles(bool showThemes); + + void printDebugInfo(highlight::LanguageDefinition &lang, + const string &langDefPath); + + string getFileSuffix(const string &fileName); + + string guessFileType(const string &suffix, const string &inputFile=""); + + int getNumDigits(int i); + + void printProgressBar(int total, int count); + void printCurrentAction(const string&outfilePath, + int total, int count, int countWidth); + + bool readInputFilePaths(vector<string> &fileList, string wildcard, + bool recursiveSearch); + + string analyzeShebang(const string& file); + bool loadMapConfig(const string& name, StringMap* map); + +}; + +#endif +// +// C++ Implementation: platform_fs +// +// Description: +// +// +// Author: André Simon <andre.simon1@gmx.de>, (C) 2004 +// +// Copyright: See COPYING file that comes with this distribution +// +// + +#include "platform_fs.h" +#include "./dirstream0.4/dirstream.h" + +#include <iostream> + +using namespace std; + +namespace Platform { + +#ifdef _WIN32 + #include <windows.h> + + const char pathSeparator = '\\'; + //const std::string pathSeparatorStr = "\\"; + + std::string getAppPath() + { + char pathAndName[MAX_PATH], path[MAX_PATH], drive[3]; + GetModuleFileName(NULL, pathAndName, MAX_PATH); + _splitpath(pathAndName, drive, path, 0, 0); + return std::string(drive)+path; + } + +#else + const char pathSeparator = '/'; + // const std::string pathSeparatorStr = "/"; + + std::string getAppPath() + { + return ""; + } + +#endif + +bool getDirectoryEntries(vector<string> &fileList, + string wildcard, + bool recursiveSearch) +{ + if (!wildcard.empty()) { + string directory_path; + string::size_type Pos = wildcard.find_last_of(pathSeparator); + if (Pos == string::npos) { + directory_path = "."; + } else { + directory_path = wildcard.substr(0, Pos + 1); + wildcard = wildcard.substr(Pos + 1); + } + + dirstr::dirstream str( directory_path.c_str(), + #ifdef USE_FN_MATCH + dirstr::pred_f(FnMatcher(wildcard.c_str(), 0)), + #else + dirstr::pattern_f(wildcard.c_str()), + #endif + (recursiveSearch)?dirstr::recursive_yes:dirstr::recursive_no); + + + for(string entry; str >> entry;) { + fileList.push_back(dirstr::full_path(entry)); + //std::cout << "Entry " <<entry<<"\n"; + } + } + return !(fileList.empty()); +} + +} + +// +// C++ Interface: platform_fs +// +// Description: +// +// +// Author: André Simon <andre.simon1@gmx.de>, (C) 2004 +// +// Copyright: See COPYING file that comes with this distribution +// +// +#ifndef PLATFORM_FS__H__INCLUDED +#define PLATFORM_FS__H__INCLUDED + +#include <string> +#include <iostream> +#include <vector> + +#ifdef USE_FN_MATCH + #include <fnmatch.h> +#endif + +namespace Platform +{ + extern const char pathSeparator; + //extern const std::string pathSeparatorStr; + + std::string getAppPath(); + + /** \param fileList Vector where found entries will be stored + \param wildcard Directory path and wildcard + \param recursiveSearch Test if directory should be searched recursively */ + bool getDirectoryEntries(std::vector<std::string> &fileList, + std::string wildcard, + bool recursiveSearch=false); + +#ifdef USE_FN_MATCH + struct FnMatcher + { + FnMatcher(const char* pattern, int flags) + : pattern_(pattern) + , flags_(flags) + {} + bool operator()(const std::string& e) const { + // std::cout << "pattern: "<<pattern_<< " entry: "<<e.c_str()<< " Res fn: " <<::fnmatch(pattern_, e.c_str(), FNM_PATHNAME)<< " \n"; + return ! ::fnmatch(pattern_, e.c_str(), flags_); + } + private: + const char* pattern_; + int flags_; + }; +#endif +} +#endif +/*************************************************************************** + PreFormatter.cpp - description + ------------------- + begin : Mo Jan 03 2005 + copyright : (C) 2005 by André Simon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include "preformatter.h" + +namespace highlight { + +PreFormatter::PreFormatter(bool wrap, bool replTabs): + maxLineLength(80), + index(0), + numberSpaces(0), + wsPrefixLength(string::npos), + hasMore(false), + indentAfterOpenBraces(true), + redefineWsPrefix(false), + wrapLines(wrap), + replaceTabs(replTabs) +{ +} + +PreFormatter::PreFormatter(): + maxLineLength(80), + index(0), + numberSpaces(0), + wsPrefixLength(string::npos), + hasMore(false), + indentAfterOpenBraces(true), + redefineWsPrefix(false), + wrapLines(false), + replaceTabs(false) +{ +} + +PreFormatter::~PreFormatter() +{ +} + +bool PreFormatter::hasMoreLines(){ + return hasMore; +} + +bool PreFormatter::indentCode(){ + return indentAfterOpenBraces; +} + +void PreFormatter::setLine(const std::string newLine){ + + line=newLine; + + if (replaceTabs && numberSpaces) { + size_t tabPos=line.find('\t'); + while (tabPos!=string::npos){ + line.replace(tabPos , 1, numberSpaces - (tabPos % numberSpaces) , ' '); + tabPos = line.find('\t', tabPos+1); + } + } + + if (wrapLines){ + wsPrefix.clear(); + index=0; + wsPrefixLength=string::npos; + hasMore=true; + redefineWsPrefix=false; + } +} + +std::string PreFormatter::getNextLine(){ + + if (!wrapLines){ + hasMore = false; + return line; + } + + if (!index && line.length() > maxLineLength){ // erster Durchlauf... + // wenn möglich an öffnender Klammer oder Geichheitszeichen ausrichten + if (indentAfterOpenBraces){ + wsPrefixLength=line.find_first_of(INDENT_MARKERS); + } + // sonst die Einrückung der Originalzeile beibehalten + if (wsPrefixLength==string::npos || wsPrefixLength-index>maxLineLength){ + wsPrefixLength=line.find_first_not_of(WS_CHARS); + } + else { + // wsPrefix in allen neu umgebrochenen Zeilen durch Spaces ersetzen + redefineWsPrefix=true; + // Position hinter öffnende Klammer springen + wsPrefixLength=line.find_first_not_of(WS_CHARS,wsPrefixLength+1); + } + + if (wsPrefixLength!=string::npos){ + index = wsPrefixLength; + // Falls Anzahl der Whitespaces am beginn der ersten zeile größer + // als Max. Zeilenlänge, Whitespaces verwerfen + if (wsPrefixLength>maxLineLength){ + wsPrefixLength=0; + return string(); + } + else{ + wsPrefix=line.substr(0, wsPrefixLength); + } + } + // Zeile enthaelt nur Whitespace; verwerfen + else { + hasMore= false; + return string(); + } + } else { + if (redefineWsPrefix){ + wsPrefix.clear(); + wsPrefix.append(wsPrefixLength, ' '); + } + redefineWsPrefix=false; + } + + string resultString; + + // Position, ab der rckwaerts nach Umbruchmglichkeit gesucht wird + unsigned int searchEndPos = maxLineLength - wsPrefixLength; + + // letztes Teilstueck der Zeile ausgeben; Parsen beenden + if (line.length()-index < searchEndPos) { + hasMore=false; + resultString=(index>0) ? wsPrefix + line.substr(index) : line.substr(index); + return resultString; + } + + // Umbrechposition suchen + size_t lbPos = line.find_last_of(LB_CHARS, index+searchEndPos); + if (lbPos <= index || lbPos == string::npos) { + // nichts gefunden, hart umbrechen + lbPos = index + searchEndPos; + } + // Einrückung der Originalzeile erhalten + resultString+=wsPrefix; + // Neue Zeile erzeugen + resultString += line.substr(index, lbPos-index+1); + + // Whitespace am neuen Zeilenbeginn ignorieren, ausser beim ersten Durchlauf + //unsigned int newIndex=StringTools::getNextNonWsPos(line,lbPos+1); + size_t newIndex=line.find_first_not_of(WS_CHARS, lbPos+1); + index=(newIndex!=string::npos)?newIndex:line.length(); + + hasMore=index!=line.length(); // unnoetigen Leerstring vermeiden + + return resultString; +} + +void PreFormatter::setWrappingProperties(unsigned int maxLineLength, bool indentAfterOpenBraces){ + this->maxLineLength = maxLineLength; + this->indentAfterOpenBraces = indentAfterOpenBraces; +} + +void PreFormatter::setNumberSpaces(unsigned int num){ + numberSpaces = num; +} + +} +/*************************************************************************** + PreFormatter.cpp - description + ------------------- + begin : Mo Jan 03 2005 + copyright : (C) 2005 by André Simon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#ifndef PreFormatter_H +#define PreFormatter_H + +#define LB_CHARS " \t[](){}-+<>.:,;" +#define WS_CHARS " \n\r\t" +#define INDENT_MARKERS "{(=" + +#include <string> +#include <iostream> + +#include "stringtools.h" + +namespace highlight { + +/** \brief Class which provides intelligent line wrapping. +* @author Andre Simon +*/ + +class PreFormatter{ +public: + /** Constructor + */ + PreFormatter(bool wrap, bool replTabs); + + PreFormatter(); + + ~PreFormatter(); + + /** + \return True if current line can be wrapped again + */ + bool hasMoreLines(); + + /** + Sets new line to be wrapped + \param newline New line + */ + void setLine(const std::string newline); + + /** + The method will indent function calls and statements + \return Next line + */ + std::string getNextLine(); + + /** + \return True if lines following open braces should be indented + */ + bool indentCode(); + + /** + \param maxlength max. length of output lines + \param indentAfterOpenBraces set true if lines should be indented after braces + */ + void setWrappingProperties(unsigned int maxlength=80, bool indentAfterOpenBraces=true); + + /** + \param num number of spaces which replace a tab + */ + + void setNumberSpaces(unsigned int num); + + /** + \return true if preformatting is enabled + */ + bool isEnabled(){ + return wrapLines || replaceTabs; + } + +private: + + unsigned int maxLineLength; + + std::string line, wsPrefix; + unsigned int index; + unsigned int numberSpaces; + size_t wsPrefixLength; + bool hasMore, indentAfterOpenBraces; + bool redefineWsPrefix; + bool wrapLines, replaceTabs; +}; + +} + +#endif +/*************************************************************************** + rtfcode.cpp - description + ------------------- + begin : Die Jul 9 2002 + copyright : (C) 2002 by André Simon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include "rtfgenerator.h" + +using namespace std; + +namespace highlight { + +string RtfGenerator::formatStyleAttributes( const ElementStyle & col) +{ + stringstream s; + s << "\\red"<< col.getColour().getRTFRedValue() + << "\\green"<<col.getColour().getRTFGreenValue() + << "\\blue"<<col.getColour().getRTFBlueValue() + << ";"; + + return s.str(); +} + +string RtfGenerator::getOpenTag(int styleNumber,const ElementStyle & elem) +{ + ostringstream s; + s << "{\\cf"<<(styleNumber+1)<<"{"; + if (elem.isBold()) s << "\\b "; + if (elem.isItalic()) s << "\\i "; + if (elem.isUnderline()) s << "\\ul "; + return s.str(); +} + +string RtfGenerator::getCloseTag(const ElementStyle &elem) +{ + ostringstream s; + if (elem.isBold()) s << "\\b0 "; + if (elem.isItalic()) s << "\\i0 "; + if (elem.isUnderline()) s << "\\ul0 "; + s << "}}"; + return s.str(); +} + +RtfGenerator::RtfGenerator(const string &colourTheme) + : CodeGenerator( colourTheme) +{ + styleTagOpen.push_back(getOpenTag(STANDARD, docStyle.getDefaultStyle())); + styleTagOpen.push_back(getOpenTag(STRING, docStyle.getStringStyle())); + styleTagOpen.push_back(getOpenTag(NUMBER, docStyle.getNumberStyle())); + styleTagOpen.push_back(getOpenTag(SL_COMMENT, docStyle.getSingleLineCommentStyle())); + styleTagOpen.push_back(getOpenTag(ML_COMMENT_BEGIN,docStyle.getCommentStyle())); + styleTagOpen.push_back(getOpenTag(ESC_CHAR, docStyle.getEscapeCharStyle())); + styleTagOpen.push_back(getOpenTag(DIRECTIVE_LINE, docStyle.getDirectiveStyle())); + styleTagOpen.push_back(getOpenTag(DIRECTIVE_STRING, docStyle.getDirectiveStringStyle())); + styleTagOpen.push_back(getOpenTag(LINENUMBER, docStyle.getLineStyle())); + styleTagOpen.push_back(getOpenTag(SYMBOL, docStyle.getSymbolStyle())); + + styleTagClose.push_back(getCloseTag(docStyle.getDefaultStyle())); + styleTagClose.push_back(getCloseTag(docStyle.getStringStyle())); + styleTagClose.push_back(getCloseTag(docStyle.getNumberStyle())); + styleTagClose.push_back(getCloseTag(docStyle.getSingleLineCommentStyle())); + styleTagClose.push_back(getCloseTag(docStyle.getCommentStyle())); + styleTagClose.push_back(getCloseTag(docStyle.getEscapeCharStyle())); + styleTagClose.push_back(getCloseTag(docStyle.getDirectiveStyle())); + styleTagClose.push_back(getCloseTag(docStyle.getDirectiveStringStyle())); + styleTagClose.push_back(getCloseTag(docStyle.getLineStyle())); + styleTagClose.push_back(getCloseTag(docStyle.getSymbolStyle())); + + newLineTag = "\\par\\pard\n"; + spacer = " "; +} + +RtfGenerator::RtfGenerator() +{} +RtfGenerator::~RtfGenerator() +{} + +string RtfGenerator::getHeader(const string & title) +{ + return string(); +} + +void RtfGenerator::printBody() +{ + *out << "{\\rtf1\\ansi\\uc0 \\deff1" + << "{\\fonttbl{\\f1\\fmodern\\fprq1\\fcharset0 Courier;}}" + << "{\\colortbl;"; + + *out << formatStyleAttributes(docStyle.getDefaultStyle()); + + *out << formatStyleAttributes(docStyle.getStringStyle()); + *out << formatStyleAttributes(docStyle.getNumberStyle()); + *out << formatStyleAttributes(docStyle.getSingleLineCommentStyle()); + + *out << formatStyleAttributes(docStyle.getCommentStyle()); + *out << formatStyleAttributes(docStyle.getEscapeCharStyle()); + *out << formatStyleAttributes(docStyle.getDirectiveStyle()); + + *out << formatStyleAttributes(docStyle.getDirectiveStringStyle()); + *out << formatStyleAttributes(docStyle.getLineStyle()); + *out << formatStyleAttributes(docStyle.getSymbolStyle()); + + /* For output formats which can refer to external styles it is more safe + to use the colour theme's keyword class names, since the language + definitions (which may change during a batch conversion) do not have to define + all keyword classes, that are needed to highlight all input files correctly. + It is ok for RTF to use the language definition's class names, because RTF + does not refer to external styles. + We cannot use the theme's class names, because KSIterator returns an + alphabetically ordered list, which is not good because RTF is dependent + on the order. We access the keyword style with an ID, which is calculated + ignoring the alphabetic order. + */ + vector<string> keywordClasses = langInfo.getKeywordClasses(); + for (unsigned int i=0;i<keywordClasses.size();i++){ + *out << formatStyleAttributes(docStyle.getKeywordStyle(keywordClasses[i])); + } + + *out << "}\n{\\info }\\paperw11900\\paperh16820\\margl560\\margr560\\margt840" + << "\\margb840\\widowctrl\\ftnbj\\aenddoc\\formshade \\fet0\\sectd" + << "\\linex0\\endnhere\\plain\\f1\\fs20\n\\pard "; + processRootState(); + *out << "}}"<<endl; +} + + +string RtfGenerator::getFooter() +{ + return string(); +} + +/** Gibt RTF-Code der Sonderzeichen zurueck */ +string RtfGenerator::maskCharacter(unsigned char c) +{ + switch (c) + { + case '}' : + case '{' : + case '\\' : + { + string m; + m="\\"; + return m+=c; + } + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + string m; + m="{"; + m+=c; + m+="}"; + return m; + } + break; + case AUML_LC: + return "\\'e4"; + break; + case OUML_LC: + return "\\'f6"; + break; + case UUML_LC: + return "\\'fc"; + break; + case AUML_UC: + return "\\'c4"; + break; + case OUML_UC: + return "\\'d6"; + break; + case UUML_UC: + return "\\'dc"; + break; + + case AACUTE_LC: + return "\\'e1"; + break; + case EACUTE_LC: + return "\\'e9"; + break; + case OACUTE_LC: + return "\\'f3"; + break; + case UACUTE_LC: + return "\\'fa"; + break; + + case AGRAVE_LC: + return "\\'e0"; + break; + case EGRAVE_LC: + return "\\'e8"; + break; + case OGRAVE_LC: + return "\\'f2"; + break; + case UGRAVE_LC: + return "\\'f9"; + break; + + case AACUTE_UC: + return "\\'c1"; + break; + case EACUTE_UC: + return "\\'c9"; + break; + case OACUTE_UC: + return "\\'d3"; + break; + case UACUTE_UC: + return "\\'da"; + break; + case AGRAVE_UC: + return "\\'c0"; + break; + case EGRAVE_UC: + return "\\'c8"; + break; + case OGRAVE_UC: + return "\\'d2"; + break; + case UGRAVE_UC: + return "\\'d9"; + break; + + case SZLIG: + return "\\'df"; + break; + // skip first byte of multibyte chracters + /* #ifndef _WIN32 + case 195: + return string(""); + break; +#endif*/ + + default : + { + string m; + return m += c; + } + } +} + +string RtfGenerator::getMatchingOpenTag(unsigned int styleID){ + return getOpenTag(KEYWORD+styleID, + docStyle.getKeywordStyle(langInfo.getKeywordClasses()[styleID])); +} + +string RtfGenerator::getMatchingCloseTag(unsigned int styleID){ + return getCloseTag(docStyle.getKeywordStyle(langInfo.getKeywordClasses()[styleID])); +} + + +} +/*************************************************************************** + rtfcode.h - description + ------------------- + begin : Die Jul 9 2002 + copyright : (C) 2002 by Andre Simon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#ifndef RTFGENERATOR_H +#define RTFGENERATOR_H + +#include <iostream> +#include <fstream> +#include <string> +#include <sstream> + +#include "codegenerator.h" +#include "charcodes.h" +#include "version.h" + +namespace highlight { + +/** + \brief This class generates RTF. + + It contains information about the resulting document structure (document + header and footer), the colour system, white space handling and text + formatting attributes. + +* @author Andre Simon +*/ + +class RtfGenerator : public highlight::CodeGenerator + { + public: + + /** Constructor + \param colourTheme Name of Colour theme to use + */ + RtfGenerator( const string &colourTheme); + RtfGenerator(); + ~RtfGenerator(); + + /** prints document header + \param title Title of the document + */ + string getHeader(const string & title); + + /** Prints document footer*/ + string getFooter(); + + /** Prints document body*/ + void printBody(); + + private: + + /** \return escaped character*/ + virtual string maskCharacter(unsigned char ); + + /**\return text formatting attributes in RTF format */ + string formatStyleAttributes( const ElementStyle & col); + + /** gibt RTF-"Tags" zurueck (Farbindex+bold+kursiv)*/ + string getOpenTag(int styleNumber,const ElementStyle &); + + string getCloseTag(const ElementStyle &); + + string getMatchingOpenTag(unsigned int styleID); + string getMatchingCloseTag(unsigned int styleID); + }; + +} +#endif +/*************************************************************************** + stringtools.cpp - description + ------------------- + begin : Mon Dec 10 2001 + copyright : (C) 2001 by André Simon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include "stringtools.h" + +#include <sstream> +#include <iostream> +#include <cctype> + +using namespace std; + +namespace StringTools + { +// Make a lowercase copy of s: +// (C) Bruce Eckel, Thinking in C++ Vol 2 + +string lowerCase(const string& s) +{ + char* buf = new char[s.length()]; + s.copy(buf, s.length()); + for(unsigned int i = 0; i < s.length(); i++) + buf[i] = tolower(buf[i]); + string r(buf, s.length()); + delete buf; + return r; +} + +int str2int(string s) +{ + istringstream os(s); + int intVal; + os >> intVal; + return intVal; +} + + bool isAlpha(unsigned char c) + { + return (isalpha(c) || c == '_'); + } + +string trimRight(const string &value) + { + string::size_type where = value.find_last_not_of(" \t\r"); + + if (where == string::npos) + // string has nothing but space + return string(); + + if (where == (value.length() - 1)) + // string has no trailing space, don't copy its contents + return value; + + return value.substr(0, where + 1); + } + +unsigned char getNextNonWs(const string &line, int index) +{ + unsigned char c; + do + { + c=line[index++]; + } + while (isspace(c)); + return c; +} + +string getParantheseVal(const string &s){ + string::size_type openPos=s.find('('); + string::size_type closePos=s.rfind(')'); + if (openPos ==string::npos || closePos==string::npos){ + return string(); + } + return s.substr(openPos+1, closePos-openPos-1); + +} + +} +/*************************************************************************** + stringtools.h - description + ------------------- + begin : Mon Dec 10 2001 + copyright : (C) 2001 by André Simon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#ifndef STRINGTOOLS_H +#define STRINGTOOLS_H + +#include <string> + +using namespace std; + +/**\brief Contains methods for string manipulation + *@author Andre Simon + */ + +namespace StringTools + { + + /** \param s String + \returns lowercase string */ + string lowerCase(const string &s); + + /** \param s String + \returns Integer value */ + int str2int(string s); + + /** \return true if c is alpa or underscore */ + bool isAlpha(unsigned char c); + + /** \param value String + \return string trimmed on the left side + */ + string trimRight(const string &value); + + /** \return next character in line starting from index, which is no whitespace*/ + unsigned char getNextNonWs(const string &line, int index=0); + + /** \param s String, containing a opening and a closing paranthesis + \return value between "(", ")" */ + string getParantheseVal(const string &s); + +} + +#endif +/*************************************************************************** + stylecolour.cpp - description + ------------------- + begin : Die Nov 5 2002 + copyright : (C) 2002 by André Simon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include "stylecolour.h" + +using std::string; + +namespace highlight { + +StyleColour::StyleColour(const string & r_hex, const string & g_hex, const string & b_hex) + : r(r_hex), g(g_hex), b(b_hex) +{} +StyleColour::StyleColour() + : r("00"), g("00"), b("00") +{} + +//Parst PArameter aus style-Datei +StyleColour::StyleColour(const string & styleColourString) +{ + setRGBValues(styleColourString); +} + +void StyleColour::setRGBValues(const string & styleColourString){ + //Stringstream zum Einlesen der Tokens: + istringstream valueStream(styleColourString.c_str()); + valueStream >> r; + valueStream >> g; + valueStream >> b; +} + +void StyleColour::setRedValue(const string & r_hex) +{ + r = r_hex; +} + +void StyleColour::setGreenValue(const string & g_hex) +{ + g = g_hex; +} + +void StyleColour::setBlueValue(const string & b_hex) +{ + b = b_hex; +} + +string& StyleColour::getHexRedValue() +{ + return r; +} +string& StyleColour::getHexGreenValue() +{ + return g; +} +string& StyleColour::getHexBlueValue() +{ + return b; +} + + +string StyleColour::getRTFRedValue() +{ + return int2str(hex2dec(r)); +} +string StyleColour::getRTFGreenValue() +{ + return int2str(hex2dec(g)); +} +string StyleColour::getRTFBlueValue() +{ + return int2str(hex2dec(b)); +} + + +string StyleColour::getLatexRedValue() +{ + return float2str((float)hex2dec(r)/255); +} +string StyleColour::getLatexGreenValue() +{ + return float2str((float)hex2dec(g)/255); +} +string StyleColour::getLatexBlueValue() +{ + return float2str((float)hex2dec(b)/255); +} + +// Konvertieren von RGB nach CYM +string StyleColour::getTexRedValue() +{ + return float2str(1-(float)hex2dec(r)/255); +} +string StyleColour::getTexGreenValue() +{ + return float2str(1-(float)hex2dec(g)/255); +} +string StyleColour::getTexBlueValue() +{ + return float2str(1-(float)hex2dec(b)/255); +} + + +string StyleColour::int2str(const int num) +{ + std::ostringstream outStream; + outStream << num; + + return outStream.str(); +} + +string StyleColour::float2str(const double num) +{ + std::ostringstream outStream; + outStream << ( floor ( num * 100 + .5 ) / 100); + + return outStream.str(); +} + +int StyleColour::hex2dec(const string &hexVal) +{ + + if (hexVal.length() != 2) + return 0; + + unsigned int decVal=0, koeff=16; + + for (int i=0; i<2;i++ ) + { + if ((hexVal[i] >= '0')&& (hexVal[i]<= '9' )) + { + decVal += (koeff * (hexVal[i]-'0')); + + } + if ((hexVal[i] >= 'a')&& (hexVal[i]<= 'f' )) + { + decVal +=( koeff * (hexVal[i]-87)); + } + if ((hexVal[i] >= 'A')&& (hexVal[i]<= 'F' )) + { + decVal += (koeff * (hexVal[i]-55)); + } + koeff=1; + } + return decVal; +} + +} + +/*************************************************************************** + stylecolour.h - description + ------------------- + begin : Die Nov 5 2002 + copyright : (C) 2002 by Andre Simon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#ifndef STYLECOLOUR_H +#define STYLECOLOUR_H + +#include <string> +#include <sstream> +#include <fstream> +#include <cmath> +#include <sstream> + +using namespace std; + +namespace highlight { + +/**\brief Stores colours and returns red, green and blue values in different formats +* @author Andre Simon + */ + +class StyleColour + { + public: + /** Constructor + \param r_hex Red value in hex notation + \param g_hex Blue value in hex notation + \param b_hex Green value in hex notation + */ + StyleColour(const string & r_hex, const string & g_hex, const string & b_hex); + + /** Constructor + \param styleColourString String with rgb values + */ + StyleColour(const string & styleColourString); + + StyleColour(); + ~StyleColour(){}; + + /** Sets red, green and blue values + \param styleColourString String containing colour attributes + */ + void setRGBValues(const string & styleColourString); + + /** Sets red value + \param r_hex New red value */ + void setRedValue(const string & r_hex); + + /** Sets green value + \param g_hex New green value */ + void setGreenValue(const string & g_hex); + + /** Sets blue value + \param b_hex New blue value */ + void setBlueValue(const string & b_hex); + + /** \return Red value in hex format */ + string& getHexRedValue(); + /** \return Green value in hex format */ + string& getHexGreenValue(); + /** \return Blue value in hex format */ + string& getHexBlueValue(); + + /** \return Red value in latex format */ + string getLatexRedValue(); + /** \return Green value in latex format */ + string getLatexGreenValue(); + /** \return Blue value in latex format */ + string getLatexBlueValue(); + + /** \return Red value in tex format */ + string getTexRedValue(); + /** \return Green value in tex format */ + string getTexGreenValue(); + /** \return Blue value in tex format */ + string getTexBlueValue(); + + /** \return Red value in RTF format */ + string getRTFRedValue(); + /** \return Green value in RTF format */ + string getRTFGreenValue(); + /** \return Blue value in RTF format */ + string getRTFBlueValue(); + + private: + string r, g, b; + string int2str(int); + string float2str(double); + int hex2dec(const string &hexVal); + }; + +} + +#endif +/*************************************************************************** + TexGenerator.cpp - description + ------------------- + begin : Mit Jul 24 2002 + copyright : (C) 2002 by André Simon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include "texgenerator.h" + +namespace highlight { + +TexGenerator::TexGenerator(const string &colourTheme): + CodeGenerator( colourTheme) +{ + styleTagOpen.push_back( "{\\hlstd "); + styleTagOpen.push_back( "{\\hlstr "); + styleTagOpen.push_back( "{\\hlnum "); + styleTagOpen.push_back( "{\\hlslc "); + styleTagOpen.push_back( "{\\hlcom "); + styleTagOpen.push_back( "{\\hlesc "); + styleTagOpen.push_back( "{\\hldir "); + styleTagOpen.push_back( "{\\hldstr "); + styleTagOpen.push_back( "{\\hlline "); + styleTagOpen.push_back( "{\\hlsym "); + for (int i=0;i<NUMBER_BUILTIN_STYLES; i++) { + styleTagClose.push_back( "}"); + } + + /*This makes TeX to use every par it encounters (the \\leavevmode has + no effect when TeX is in horizontal mode and when TeX is in vertical + mode, it switches it to horizontal mode).*/ + newLineTag="\\leavevmode\\par\n"; + + spacer = "\\ "; + maskWs=true; + excludeWs=true; + maskWsBegin = "{\\hlstd"; + maskWsEnd = "}"; + styleCommentOpen="%"; +} + +TexGenerator::TexGenerator() +{} +TexGenerator::~TexGenerator() +{} + +string TexGenerator::formatStyleAttributes(const string & elemName,const ElementStyle & elem) +{ + ostringstream s; + s << "\\def\\hl" + << elemName + << "{"; + if (elem.isBold()) + s << "\\bf"; + if (elem.isItalic()) + s << "\\it"; + s << "\\textColor{" + << (elem.getColour().getTexRedValue())<<" " + << (elem.getColour().getTexGreenValue())<<" " + << (elem.getColour().getTexBlueValue())<<" 0.0}}\n"; + return s.str(); +} + +string TexGenerator::getHeader(const string & title) +{ + ostringstream os; + + if (langInfo.getSyntaxHighlight()) { + if (includeStyleDef) { + os << "\n"<<getStyleDefinition(); + os << CodeGenerator::readUserStyleDef(); + } else { + os << "\\input " + << getStyleOutputPath() + << "\n\n"; + } + } + + return os.str(); +} + +void TexGenerator::printBody() +{ + *out << "{\n\\tt\n"; + + processRootState(); + *out << "}\n"; +} + +string TexGenerator::getFooter() +{ + ostringstream os; + os << "\\bye\n" + << "% TeX generated by Highlight " + << HIGHLIGHT_VERSION + << ", " + << HIGHLIGHT_URL + << endl; + return os.str(); +} + +string TexGenerator:: maskCharacter(unsigned char c) +{ + switch (c) + { + case '{': + case '}': + { + string m; + m = "$\\"; + m += c; + m += "$"; + return m; + } + break; + case '^': + return "{\\bf\\^{}}"; + break; + case '_': + return "\\_{}"; + break; + case '&': + case '$': + case '#': + case '%': + { + string m; + m = "\\"; + m += c; + return m; + } + break; + case '\\': + return "$\\backslash$"; + break; + case ' ': + return spacer; + break; + case '+': + case '-': + case '<': + case '>': + case '=': + { + string m; + m = "$\\mathord{"; + m += c; + m += "}$"; + return m; + } + break; + case AUML_LC: + return "\\\"a"; + break; + case OUML_LC: + return "\\\"o"; + break; + case UUML_LC: + return "\\\"u"; + break; + case AUML_UC: + return "\\\"A"; + break; + case OUML_UC: + return "\\\"O"; + break; + case UUML_UC: + return "\\\"U"; + break; + case AACUTE_LC: + return "\\'a"; + break; + case EACUTE_LC: + return "\\'e"; + break; + case OACUTE_LC: + return "\\'o"; + break; + case UACUTE_LC: + return "\\'u"; + break; + case AGRAVE_LC: + return "\\`a"; + break; + case EGRAVE_LC: + return "\\`e"; + break; + case OGRAVE_LC: + return "\\`o"; + break; + case UGRAVE_LC: + return "\\`u"; + break; + case AACUTE_UC: + return "\\'A"; + break; + case EACUTE_UC: + return "\\'E"; + break; + case OACUTE_UC: + return "\\'O"; + break; + case UACUTE_UC: + return "\\'U"; + break; + case AGRAVE_UC: + return "\\`A"; + break; + case EGRAVE_UC: + return "\\`E"; + break; + case UGRAVE_UC: + return "\\`O"; + break; + case OGRAVE_UC: + return "\\`U"; + break; + case SZLIG: + return "\\ss "; + break; + /* #ifndef _WIN32 + // skip first byte of multibyte chracters + case 195: + return string(""); + break; +#endif*/ + + default : + string m; + return m += c; + } +} + +string TexGenerator::getMatchingOpenTag(unsigned int styleID){ + return "{\\hl"+langInfo.getKeywordClasses()[styleID]+" "; + } + +string TexGenerator::getMatchingCloseTag(unsigned int styleID){ + return "}"; +} + + +string TexGenerator::getStyleDefinition() +{ + if (styleDefinitionCache.empty()){ + ostringstream os; + os << formatStyleAttributes("std", docStyle.getDefaultStyle()); + os << formatStyleAttributes("num", docStyle.getNumberStyle()); + os << formatStyleAttributes("esc", docStyle.getEscapeCharStyle()); + os << formatStyleAttributes("str", docStyle.getStringStyle()); + os << formatStyleAttributes("dstr", docStyle.getDirectiveStringStyle()); + os << formatStyleAttributes("slc", docStyle.getSingleLineCommentStyle()); + os << formatStyleAttributes("com", docStyle.getCommentStyle()); + os << formatStyleAttributes("dir", docStyle.getDirectiveStyle()); + os << formatStyleAttributes("line", docStyle.getLineStyle()); + os << formatStyleAttributes("sym", docStyle.getSymbolStyle()); + + KeywordStyles styles = docStyle.getKeywordStyles(); + for (KSIterator it=styles.begin(); it!=styles.end(); it++){ + os << formatStyleAttributes(it->first, *(it->second)); + } + + os << "% The special option is not supported by all dvi drivers\n" + << "\\special{background rgb " + << docStyle.getBgColour().getLatexRedValue() << " " + << docStyle.getBgColour().getLatexGreenValue() << " " + << docStyle.getBgColour().getLatexBlueValue() << "}"; + os << "\n\\nopagenumbers\n" + << "\\input colordvi\n"; + styleDefinitionCache=os.str(); + } + return styleDefinitionCache; +} + + +} +/*************************************************************************** + texcode.h - description + ------------------- + begin : Mit Jul 24 2002 + copyright : (C) 2002 by André Simon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#ifndef TEXGENERATOR_H +#define TEXGENERATOR_H + +#include <string> +#include <iostream> +#include <sstream> + +#include "charcodes.h" +#include "version.h" +#include "codegenerator.h" + + +namespace highlight { + +/** + \brief This class generates TeX. + + It contains information about the resulting document structure (document + header and footer), the colour system, white space handling and text + formatting attributes. + +* @author Andre Simon +*/ + +class TexGenerator : public highlight::CodeGenerator + { + public: + + /** Constructor + \param colourTheme Name of Colour theme to use + */ + TexGenerator(const string &colourTheme); + TexGenerator(); + ~TexGenerator(); + + /** prints document header + \param title Title of the document + */ + string getHeader(const string & title); + + /** Prints document footer*/ + string getFooter(); + + /** Prints document body*/ + void printBody(); + + private: + + string styleDefinitionCache; + + string getStyleDefinition(); + + /** \return escaped character*/ + virtual string maskCharacter(unsigned char ); + + /**\return text formatting attributes in RTF format */ + string formatStyleAttributes(const string & elemName, const ElementStyle & elem); + + string getMatchingOpenTag(unsigned int styleID); + string getMatchingCloseTag(unsigned int styleID); + + }; + +} + +#endif +/*************************************************************************** + version.h - description + ------------------- + begin : Mon March 3 2003 + copyright : (C) 2003 by André Simon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#ifndef VERSION_H +#define VERSION_H + +#define HIGHLIGHT_VERSION "2.2-10" + +#define HIGHLIGHT_URL "http://www.andre-simon.de/" +#define HIGHLIGHT_EMAIL "andre.simon1@gmx.de" + +#endif +/*************************************************************************** + htmlcode.cpp - description + ------------------- + begin : Wed Nov 28 2001 + copyright : (C) 2001 by André Simon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include "xhtmlgenerator.h" + +using namespace std; + +namespace highlight { + +XHtmlGenerator::XHtmlGenerator(void) +{} + +XHtmlGenerator::XHtmlGenerator ( + const string &cssStyle, + const string &enc, + bool omitEnc, + bool withAnchors) + : HtmlGenerator(cssStyle, enc, omitEnc, withAnchors) +{ + fileSuffix=".xhtml"; + brTag="<br />"; + hrTag="<hr />"; + idAttr="id"; + + HTML_FOOTER= + "\n</body>\n</html>\n<!--XHTML generated by highlight " + HIGHLIGHT_VERSION + ", " + HIGHLIGHT_URL + "-->\n"; +} + +string XHtmlGenerator::getHeaderStart(const string &title){ + ostringstream header; + header << "<?xml version=\"1.0\""; + if (!omitEncoding) { + header << " encoding=\"" << encoding << "\""; + } + header << "?>\n<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\"" + << " \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">\n" + << "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n" + << "<head>\n<title>" << title << "</title>\n"; + + return header.str(); +} + + +string XHtmlGenerator::getHeader(const string &title) +{ + ostringstream osPart1; + osPart1 << getHeaderStart((title.empty())?"Source file":title ); + + if (langInfo.getSyntaxHighlight()) + { + if (includeStyleDef) //CSS-Definition in HTML-<head> einfuegen + { + osPart1 << "<style type=\"text/css\">\n"; + osPart1 << "<![CDATA[\n"; + osPart1 << getStyleDefinition(); + osPart1 << CodeGenerator::readUserStyleDef(); + osPart1 << "]]>\n"; + osPart1 << "</style>\n"; + } + else //Referenz auf CSS-Datei einfuegen + { + osPart1 << "<link rel=\"stylesheet\" type=\"text/css\" href=\"" + << getStyleOutputPath() + << "\"" + << "/" + << ">\n"; + } + } + osPart1 << "</head>\n<body class=\"hl\">\n<pre class=\"hl\">"; + + return osPart1.str(); +} + +} +/*************************************************************************** + xhtmlgenerator.h - description + ------------------- + begin : Mo Jun 21 2004 + copyright : (C) 2004 by Andre Simon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + + +#ifndef XHTMLGENERATOR_H +#define XHTMLGENERATOR_H + +#include "htmlgenerator.h" + +namespace highlight { + +/** + \brief This class generates XHTML. + + It contains information about the resulting document structure (document + header and footer), the colour system, white space handling and text + formatting attributes. + +* @author Andre Simon +*/ + + +class XHtmlGenerator : public highlight::HtmlGenerator + { + public: + + /** Constructor + \param colourTheme Name of Colour theme to use + \param enc encoding name + \param omitEnc switch to omit encoding information + \param withAnchors Test if HTML anchors should be attached to line numbers + */ + XHtmlGenerator(const string &colourTheme, + const string &enc, + bool omitEnc=false, + bool withAnchors = false); + + XHtmlGenerator(); + + /** Destructor*/ + virtual ~XHtmlGenerator() {}; + + private: + + /** prints document header + \param title Title of the document + */ + string getHeader(const string &title); + + string getHeaderStart(const string &title); + + }; + +} + +#endif +/*************************************************************************** + xmlcode.cpp - description + ------------------- + begin : Do 20.01.2005 + copyright : (C) 2005 by Andre Simon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include "xmlgenerator.h" + +using namespace std; +namespace highlight { + + XmlGenerator::XmlGenerator(const string &colourTheme,const string &enc, bool omitEnc) + : CodeGenerator(colourTheme), + encoding(enc), omitEncoding(omitEnc) +{ + styleTagOpen.push_back(getOpenTag("def")); + styleTagOpen.push_back(getOpenTag("str")); + styleTagOpen.push_back(getOpenTag("num")); + styleTagOpen.push_back(getOpenTag("slc")); + styleTagOpen.push_back(getOpenTag("com")); + styleTagOpen.push_back(getOpenTag("esc")); + styleTagOpen.push_back(getOpenTag("dir")); + styleTagOpen.push_back(getOpenTag("dstr")); + styleTagOpen.push_back(getOpenTag("line")); + styleTagOpen.push_back(getOpenTag("sym")); + + styleTagClose.push_back(getCloseTag("def")); + styleTagClose.push_back(getCloseTag("str")); + styleTagClose.push_back(getCloseTag("num")); + styleTagClose.push_back(getCloseTag("slc")); + styleTagClose.push_back(getCloseTag("com")); + styleTagClose.push_back(getCloseTag("esc")); + styleTagClose.push_back(getCloseTag("dir")); + styleTagClose.push_back(getCloseTag("dstr")); + styleTagClose.push_back(getCloseTag("line")); + styleTagClose.push_back(getCloseTag("sym")); + + spacer = " "; + newLineTag = "<br />\n"; +} + +string XmlGenerator::getStyleDefinition() +{ + if (styleDefinitionCache.empty()) { + ostringstream os; + os << "\n<style>\n" + << "\t<bgcolor value=\"" + << (docStyle.getBgColour().getHexRedValue()) + << (docStyle.getBgColour().getHexGreenValue()) + << (docStyle.getBgColour().getHexBlueValue()) + << "\" />\n" + << "\t<font size=\"" + << docStyle.getFontSize() + << "\" family=\"Courier\" />\n"; + + os << formatStyleAttributes("def", docStyle.getDefaultStyle()) + << formatStyleAttributes("num", docStyle.getNumberStyle()) + << formatStyleAttributes("esc", docStyle.getEscapeCharStyle()) + << formatStyleAttributes("str", docStyle.getStringStyle()) + << formatStyleAttributes("dstr", docStyle.getDirectiveStringStyle()) + << formatStyleAttributes("slc", docStyle.getSingleLineCommentStyle()) + << formatStyleAttributes("com", docStyle.getCommentStyle()) + << formatStyleAttributes("dir", docStyle.getDirectiveStyle()) + << formatStyleAttributes("sym", docStyle.getSymbolStyle()) + << formatStyleAttributes("line", docStyle.getLineStyle()); + + KeywordStyles styles = docStyle.getKeywordStyles(); + for (KSIterator it=styles.begin(); it!=styles.end(); it++){ + os << formatStyleAttributes(it->first, *(it->second)); + } + os << "</style>\n"; + styleDefinitionCache=os.str(); + } + return styleDefinitionCache; +} + + +string XmlGenerator::formatStyleAttributes(const string & elemName, + const ElementStyle & elem) +{ + ostringstream s; + s << "\t<class name=\"" + << elemName + <<"\" color=\"" + << (elem.getColour().getHexRedValue()) + << (elem.getColour().getHexGreenValue()) + << (elem.getColour().getHexBlueValue() ) + << "\" bold=\"" + << ( elem.isBold() ? "true" :"false" ) + << "\" italic=\"" + << ( elem.isItalic() ? "true" :"false" ) + << "\" underline=\"" + << ( elem.isUnderline() ? "true" :"false" ) + << "\" />\n" ; + return s.str(); +} + + +XmlGenerator::XmlGenerator() +{} +XmlGenerator::~XmlGenerator() +{} + +string XmlGenerator::getOpenTag(const string& styleName ){ + return "<"+styleName+">"; +} + +string XmlGenerator::getCloseTag(const string& styleName ){ + return "</"+styleName+">"; +} + +string XmlGenerator::getHeader(const string & title) +{ + ostringstream header; + header << "<?xml version=\"1.0\""; + if (!omitEncoding) { + header << " encoding=\"" << encoding << "\""; + } + header << "?>\n<document>" << getStyleDefinition(); + return header.str(); +} + +void XmlGenerator::printBody() +{ + *out << "<source>\n"; + processRootState(); + *out << "</source>\n"; +} + + +string XmlGenerator::getFooter() +{ + ostringstream os; + os <<"</document>\n"; + os<< "<!-- XML generated by Highlight " + << HIGHLIGHT_VERSION + << ", " + << HIGHLIGHT_URL + <<" -->\n"; + return os.str(); +} + +string XmlGenerator::maskCharacter(unsigned char c) +{ + switch (c) + { + case '<' : + return "<"; + break; + case '>' : + return ">"; + break; + case '&' : + return "&"; + break; + case '\"' : + return """; + break; + +// skip first byte of multibyte chracters + /* #ifndef _WIN32 + case 195: + return string(""); + break; +#endif*/ + + default: + string m; + m += c; + return m; + } +} + +/*string XmlGenerator::getNewLine(){ + string nlStr; + if (currentState!=_UNKNOWN){ + nlStr+=styleTagClose[getStyleID(currentState, currentKeywordClass)]; + } + nlStr += newLineTag; + if (currentState!=_UNKNOWN){ + nlStr+=styleTagOpen[getStyleID(currentState, currentKeywordClass)]; + } + return nlStr; +} +*/ +string XmlGenerator::getMatchingOpenTag(unsigned int styleID){ + return getOpenTag(langInfo.getKeywordClasses()[styleID]); +} + +string XmlGenerator::getMatchingCloseTag(unsigned int styleID){ + return getCloseTag(langInfo.getKeywordClasses()[styleID]); +} + +} +/*************************************************************************** + xmlcode.h - description + ------------------- + begin : Do 20.01.2005 + copyright : (C) 2005 by Andre Simon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#ifndef XMLGENERATOR_H +#define XMLGENERATOR_H + +#include <string> +#include <sstream> +#include <iostream> + +#include "codegenerator.h" +#include "version.h" + +namespace highlight { + +/** + \brief This class generates XML. + + It contains information about the resulting document structure (document + header and footer), the colour system, white space handling and text + formatting attributes. + +* @author Andre Simon +*/ + +class XmlGenerator : public highlight::CodeGenerator + { + public: + + /** Constructor + \param colourTheme Name of Colour theme to use + \param enc encoding name + \param omitEnc switch to omit encoding information + */ + XmlGenerator( const string &colourTheme,const string &enc, bool omitEnc=false); + + XmlGenerator(); + + ~XmlGenerator(); + + /** prints document header + \param title Title of the document + */ + string getHeader(const string & title); + + /** Prints document footer*/ + string getFooter(); + + /** Prints document body*/ + void printBody(); + + private: + + string styleDefinitionCache, encoding; + + bool omitEncoding; + + string getStyleDefinition(); + + string formatStyleAttributes(const string &, const ElementStyle &); + + /** \return escaped character*/ + virtual string maskCharacter(unsigned char ); + + +// string getNewLine(); + + string getOpenTag(const string& ); + string getCloseTag(const string& ); + + string getMatchingOpenTag(unsigned int styleID); + string getMatchingCloseTag(unsigned int styleID); + }; + +} + +#endif +/*************************************************************************** + xslfocode.cpp - description + ------------------- + begin : Do 11.12.2003 + copyright : (C) 2003 by André Simon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include "xslfogenerator.h" + +using namespace std; +namespace highlight { + +XslFoGenerator::XslFoGenerator(const string &colourTheme, + const string &enc, + bool omitEnc, + bool fopCompatible) + : CodeGenerator(colourTheme), + encoding(enc), + fopOutput(fopCompatible), + omitEncoding(omitEnc) +{ + styleTagOpen.push_back( getOpenTag(docStyle.getDefaultStyle())); + styleTagOpen.push_back( getOpenTag(docStyle.getStringStyle())); + styleTagOpen.push_back( getOpenTag(docStyle.getNumberStyle())); + styleTagOpen.push_back( getOpenTag(docStyle.getSingleLineCommentStyle())); + styleTagOpen.push_back( getOpenTag(docStyle.getCommentStyle())); + styleTagOpen.push_back( getOpenTag(docStyle.getEscapeCharStyle())); + styleTagOpen.push_back( getOpenTag(docStyle.getDirectiveStyle())); + styleTagOpen.push_back( getOpenTag(docStyle.getDirectiveStringStyle())); + styleTagOpen.push_back( getOpenTag(docStyle.getLineStyle())); + styleTagOpen.push_back( getOpenTag(docStyle.getSymbolStyle())); + snl << " <fo:block font-size=\"" + << docStyle.getFontSize() + << "pt\" font-family=\"Courier\" white-space-collapse=\"false\" " + << "wrap-option=\"wrap\" line-height=\"12pt\" background-color=\"#" + << (docStyle.getBgColour().getHexRedValue()) + << (docStyle.getBgColour().getHexGreenValue()) + << (docStyle.getBgColour().getHexBlueValue()) + << "\">"; + + for (int i=0;i<NUMBER_BUILTIN_STYLES; i++) + { + styleTagClose.push_back( "</fo:inline>"); + } + if (fopOutput) + newLineTag ="</fo:block>\n<fo:block>"; + else + newLineTag ="</fo:block>\n"+ snl.str(); + + spacer = " "; +} + +XslFoGenerator::XslFoGenerator() +{} +XslFoGenerator::~XslFoGenerator() +{} + +string XslFoGenerator::getOpenTag(const ElementStyle &elem) +{ + ostringstream s; + s << "<fo:inline color=\"#" + << (elem.getColour().getHexRedValue()) + << (elem.getColour().getHexGreenValue()) + << (elem.getColour().getHexBlueValue()) + << "\""; + s << ( elem.isBold() ?" font-weight=\"bold\"" :"" ) + << ( elem.isItalic() ?" font-style=\"italic\"" :"" ) + << ( elem.isUnderline() ?" text-decoration=\"underline\"" :"" ); + s << ">"; + return s.str(); +} + +string XslFoGenerator::getHeader(const string & title) +{ + ostringstream os; + os << "<?xml version=\"1.0\""; + if (!omitEncoding) { + os << " encoding=\"" << encoding << "\""; + } + os << "?>\n<fo:root xmlns:fo=\"http://www.w3.org/1999/XSL/Format\">\n" + << "<fo:layout-master-set>\n" + << "<fo:simple-page-master master-name=\"DINA4\"\n" + << " page-height=\"29.7cm\"\n" + << " page-width=\"21cm\"\n" + << " margin-top=\"1cm\"\n" + << " margin-bottom=\"2cm\"\n" + << " margin-left=\"2.5cm\"\n" + << " margin-right=\"2.5cm\">\n" + << "<fo:region-body />\n" + << "</fo:simple-page-master>\n" + << "<fo:page-sequence-master master-name=\"basic\">\n" + << "<fo:repeatable-page-master-alternatives>\n" + << "<fo:conditional-page-master-reference master-reference=\"DINA4\" />\n" + << "</fo:repeatable-page-master-alternatives>\n" + << "</fo:page-sequence-master>\n" + << "</fo:layout-master-set>\n\n" + << "<fo:page-sequence master-reference=\"basic\">\n" + << " <fo:flow flow-name=\"xsl-region-body\">\n"; + if (fopOutput) + os << snl.str()<< "<fo:block>"; + else + os << snl.str(); + + return os.str(); +} + +/** gibt RTF-Text aus */ +void XslFoGenerator::printBody() +{ + processRootState(); +} + + +string XslFoGenerator::getFooter() +{ + ostringstream os; + if (fopOutput) + os <<"\n</fo:block>"; + os <<"\n</fo:block>\n </fo:flow>\n</fo:page-sequence>\n</fo:root>"<<endl + << "<!-- XSL-FO generated by Highlight " + << HIGHLIGHT_VERSION + << ", " + << HIGHLIGHT_URL + <<" -->\n"; + return os.str(); +} + +/** Gibt RTF-Code der Sonderzeichen zurueck */ +string XslFoGenerator::maskCharacter(unsigned char c) +{ + switch (c) + { + case '<' : + return "<"; + break; + case '>' : + return ">"; + break; + case '&' : + return "&"; + break; + case '\"' : + return """; + break; + +// skip first byte of multibyte chracters + /*#ifndef _WIN32 + case 195: + return string(""); + break; +#endif*/ + + default: + string m; + m += c; + return m; + } +} + +/*string XslFoGenerator::getNewLine(){ + string nlStr; + + if (currentState!=_UNKNOWN){ + nlStr+=styleTagClose[getStyleID(currentState, currentKeywordClass)]; +} + nlStr += newLineTag; + if (currentState!=_UNKNOWN){ + nlStr+=styleTagOpen[getStyleID(currentState, currentKeywordClass)]; +} + return nlStr; +}*/ + +string XslFoGenerator::getMatchingOpenTag(unsigned int styleID){ + return getOpenTag(docStyle.getKeywordStyle(langInfo.getKeywordClasses()[styleID])); +} + +string XslFoGenerator::getMatchingCloseTag(unsigned int styleID){ + return "</fo:inline>"; +} + +} +/*************************************************************************** + xslfocode.h - description + ------------------- + begin : Do 11.12.2003 + copyright : (C) 2003 by Andre Simon + email : andre.simon1@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#ifndef XSLFOGENERATOR_H +#define XSLFOGENERATOR_H + + +#include <string> +#include <sstream> +#include <iostream> +#include <fstream> + +#include "codegenerator.h" +#include "version.h" + +namespace highlight { + +/** + \brief This class generates XSL-FO. + + It contains information about the resulting document structure (document + header and footer), the colour system, white space handling and text + formatting attributes. + +* @author Andre Simon +*/ + +class XslFoGenerator : public highlight::CodeGenerator + { + public: + + /** Constructor + \param colourTheme Name of Colour theme to use + \param enc encoding name + \param omitEnc switch to omit encoding information + \param fopCompatible Test if output should be compatible with Apache FOP 0.20.5 + */ + XslFoGenerator( const string &colourTheme, + const string &enc, + bool omitEnc=false, + bool fopCompatible=false); + + XslFoGenerator(); + + ~XslFoGenerator(); + + /** prints document header + \param title Title of the document + */ + string getHeader(const string & title); + + /** Prints document footer*/ + string getFooter(); + + /** Prints document body*/ + void printBody(); + + private: + ostringstream snl; + + string styleDefinition, encoding; + bool fopOutput, omitEncoding; + + /** \return escaped character*/ + virtual string maskCharacter(unsigned char ); + + string getOpenTag(const ElementStyle &); + + // string getNewLine(); + + string getMatchingOpenTag(unsigned int styleID); + string getMatchingCloseTag(unsigned int styleID); + }; + +} + +#endif diff --git a/bench/example.delphi b/bench/example.delphi new file mode 100644 index 0000000..8670459 --- /dev/null +++ b/bench/example.delphi @@ -0,0 +1,2708 @@ +// vim:ft=pascal
+
+unit YTools;
+
+{===============================================================================
+
+ cYcnus.YTools 1.0.3 Beta for Delphi 4+
+ by licenser and Murphy
+
+ ©2000-2003 by cYcnus
+ visit www.cYcnus.de
+
+ licenser@cYcnus.de (Heinz N. Gies)
+ murphy@cYcnus.de (Kornelius Kalnbach)
+
+ this unit is published under the terms of the GPL
+
+===============================================================================}
+
+interface
+
+uses
+ Windows, SysUtils, Classes, YTypes;
+
+const
+ BackSpace = #8;
+ Tab = #9;
+ LF = #10; //Line Feed
+ CR = #13; //Carriage Return
+ Space = #32;
+ EOLChars = [CR, LF];
+{$IFNDEF VER140}
+ sLineBreak = #13#10;
+ SwitchChars = ['/', '-'];
+{$ENDIF}
+ EOL = sLineBreak;
+ MaxCard = High(Cardinal);
+ AllChars = [#0..#255];
+ Alphabetical = ['A'..'Z', 'a'..'z'];
+ DecimalChars = ['0'..'9'];
+ AlphaNumerical = Alphabetical + DecimalChars;
+ StrangeChars = [#0..#31, #127, #129, #141..#144, #157, #158];
+
+ HexadecimalChars = DecimalChars + ['A'..'F', 'a'..'f'];
+ OctalChars = ['0'..'7'];
+ BinaryChars = ['0', '1'];
+
+ QuoteChars = ['''', '"'];
+ WildCards = ['*', '?'];
+ FileNameEnemies = WildCards + ['\', '/', ':', '<', '>', '|'];
+
+ HexChar: array[THex] of Char = (
+ '0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F');
+ LowerHexChar: array[THex] of Char = (
+ '0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f');
+ BaseNChar: array[TBaseN] of Char = (
+ '0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F','G','H',
+ 'I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z');
+
+ cYcnusOverlayColor = $050001;
+
+ faFindEveryFile = faReadOnly + faHidden + faSysFile + faArchive;
+
+ platWin9x = [VER_PLATFORM_WIN32s, VER_PLATFORM_WIN32_WINDOWS];
+
+
+{ Debugging }
+procedure ClearReport(const ReportName: string);
+procedure Report(const ReportName, Text: string);
+procedure ReportFmt(const ReportName, Fmt: string; const Args: array of const);
+
+{ Params }
+procedure GetParams(Strings: TStrings); overload;
+function GetParams(const Separator: string = ' '): string; overload;
+
+function ParamNum(const S: string): Integer;
+function ParamPrefixNum(const Prefix: string): Integer;
+function Param(const S: string): Boolean;
+function ParamPrefix(const Prefix: string): Boolean;
+
+function Switch(const Switch: string; const PrefixChars: TCharSet = SwitchChars;
+ IgnoreCase: Boolean = True): Boolean;
+function GetParam(const Prefix: string = ''; const Default: string = ''): string;
+
+{ Dirs & UserName}
+function GetMyDir(FullPath: Boolean = False): string;
+function WinDir: string;
+function SysDir: string;
+function UserName: string;
+
+{ Strings & Chars}
+function FirstChar(const S: string): Char;
+function LastChar(const S: string): Char;
+
+function CharPos(C: Char; const S: string; Offset: Integer = 1): Integer; overload;
+function CharPos(C: TCharSet; const S: string; Offset: Integer = 1): Integer; overload;
+function CharPosR(C: Char; const S: string; Offset: Integer = -1): Integer;
+function PosEx(const SubStr, S: string; Offset: Integer = 1): Integer;
+function PosExText(const SubStr, S: string; Offset: Integer = 1): Integer;
+function PosExAnsiText(const SubStr, S: string; Offset: Integer = 1): Integer;
+
+function UntilChar(const S: string; Brake: Char): string; overload;
+function UntilChar(const S: string; Brake: TCharSet): string; overload;
+function UntilLastChar(const S: string; Brake: Char;
+ IgnoreNoBrake: Boolean = True): string;
+
+function FromChar(const S: string; Brake: Char): string; overload;
+function FromChar(const S: string; Brake: TCharSet): string; overload;
+function FromLastChar(const S: string; Brake: Char;
+ IgnoreNoBrake: Boolean = False): string;
+
+function BetweenChars(const S: string; Start, Finish: Char;
+ Inclusive: Boolean = False): string;
+
+function UntilStr(const S: string; Brake: string): string;
+function FromStr(const S: string; Brake: string): string;
+
+function StringWrap(const S: string; Width: Integer; const LineEnd: string = EOL): string;
+
+{ Splitting & Combining }
+function Split(const S, Separator: string; IgnoreMultiSep: Boolean = True;
+ MinCount: Integer = 0): TStrA; overload;
+procedure Split(const S, Separator: string; Strings: TStrings;
+ IgnoreMultiSep: Boolean = True); overload;
+function Split(const S: string; Separators: TCharSet;
+ IgnoreMultiSep: Boolean = True; MinCount: Integer = 0): TStrA; overload;
+
+procedure TileStr(const S: string; BrakeStart: Integer; BrakeEnd: Integer;
+ out Left, Right: string);
+
+function Join(Strings: TStrings; Separator: string = ' '): string; overload;
+function Join(StrA: TStrA; Separator: string = ' '): string; overload;
+
+function MulStr(const S: string; Count: Integer): string;
+
+{ Strings ausrichten }
+function AlignR(const S: string; Width: Integer; Filler: Char = ' '): string;
+function MaxStr(const S: string; MaxLen: Integer): string;
+
+{ Stringing }
+function TrimAll(const S: string): string;
+
+function ControlChar(C: Char): Boolean;
+function FriendlyChar(C: Char): Char;
+
+function FriendlyStr(const S: string): string; overload;
+function FriendlyStr(a: TByteA): string; overload;
+
+function Quote(const S: string; Quoter: Char = '"'): string;
+function UnQuote(const S: string): string;
+function DeQuote(const S: string): string;
+
+function StrNumerus(const Value: Integer; const Singular, Plural: string;
+ const Zero: string = '0'): string;
+
+function MakeStr(const Items: array of const; Separator: string = ''): string;
+procedure ShowText(const Items: array of const; Separator: string = '');
+
+{ Delete }
+function DeleteChars(const S: string; C: Char): string; overload;
+function DeleteChars(const S: string; C: TCharSet): string; overload;
+function ExtractChars(const S: string; C: TCharSet): string;
+
+{ Find }
+function CharCount(const S: string; C: Char): Integer;
+
+function CharIn(const S: string; C: Char): Boolean; overload;
+function CharIn(const S: string; C: TCharSet): Boolean; overload;
+
+function StrAtPos(const S: string; Pos: Integer; const Str: string): Boolean;
+function StrAtBegin(const S, Str: string): Boolean;
+function StrIn(const S, SubStr: string): Boolean; overload;
+function StrIn(A: TStrA; const S: string): Boolean; overload;
+function StrIn(SL: TStrings; const S: string): Boolean; overload;
+function StrIndex(A: TStrA; const S: string): Integer; overload;
+function StrIndex(SL: TStrings; const S: string): Integer; overload;
+
+function TextAtPos(const S: string; Pos: Integer; const Text: string): Boolean;
+function TextAtBegin(const S, Text: string): Boolean;
+function TextIn(const S, Text: string): Boolean; overload;
+function TextIn(A: TStrA; const Text: string): Boolean; overload;
+function TextIn(SL: TStrings; const Text: string): Boolean; overload;
+function TextIndex(A: TStrA; const Text: string): Integer; overload;
+function TextIndex(SL: TStrings; const Text: string): Integer; overload;
+
+{ Replace }
+function ReplaceChars(const S: string; Old, New: Char): string; overload;
+function ReplaceChars(const S: string; Old: TCharSet; New: Char): string; overload;
+
+function Replace(const S, Old, New: string): string;
+
+{ TStrings }
+function SLOfFile(const FileName: string): TStringList;
+function ContainsEmptyLines(SL: TStrings): Boolean;
+procedure DeleteEmptyLines(SL: TStrings);
+procedure DeleteCommentLines(SL: TStrings; const CommentSign: string = '//');
+procedure WriteSL(Strings: TStrings; const Prefix: string = '';
+ const Suffix: string = '');
+
+function FindLine(SL: TStrings; const S: string): Integer;
+
+procedure QuickSortSL(SL: TStringList);
+
+{ TStrA }
+function IncStrA(StrA: TStrA): Integer;
+
+{ TByteA }
+function StrOfByteA(a: TByteA): string;
+function ByteAOfStr(const S: string): TByteA;
+function ByteAOfInt(i: Integer): TByteA;
+function IntOfByteA(A: TByteA): Integer;
+function ByteAOfHex(const Hex: string): TByteA;
+
+function SameByteA(const A, B: TByteA): Boolean;
+function Reverse(a: TByteA): TByteA;
+function SaveByteA(Data: TByteA; const FileName: string; Overwrite: Boolean = True): Boolean;
+function LoadByteA(const FileName: string): TByteA;
+
+function Endian(i: Integer): Integer;
+
+{ Files }
+function SizeOfFile(const FileName: string): Integer;
+function FileEx(const FileName: string; AllowFolders: Boolean = False): Boolean;
+function LWPSolve(const Dir: string): string;
+function LWPSlash(const Dir: string): string;
+
+function ExtractDrive(const FileName: string): string;
+function ExtractPath(const FileName: string): string;
+function ExtractPrefix(const FileName: string): string;
+function ExtractSuffix(const FileName: string): string;
+
+function IsValidFileName(const FileName: string): Boolean;
+function MakeValidFileName(FileName: string; const Default: string = 'File'): string;
+
+{ Converting }
+function IsValidInteger(const S: string): Boolean;
+function IsValidCardinal(const S: string): Boolean;
+
+function StrOfBool(flag: Boolean; const TrueStr: string = 'True';
+ const FalseStr: string = 'False'): string;
+function StrOfInt(i: Integer): string;
+function CardOfStr(const S: string): Cardinal;
+
+function HexOrd(Hex: Char): THex;
+function ByteOfHex(Hex: THexByteStr): Byte;
+
+function DecOfHex(const Hex: string): string;
+function HexOfByte(b: Byte): THexByteStr;
+function HexOfCard(i: Cardinal): string; overload;
+function HexOfCard(i: Cardinal; Digits: Integer): string; overload;
+
+function PascalHexArray(a: TByteA; Name: string): string;
+
+function HexOfByteA(a: TByteA; Blocks: Integer = 1;
+ const Splitter: string = ' '): string;
+function BinOfByteA(a: TByteA; Blocks: Integer = 4;
+ const Splitter: string = ' '): string;
+
+function CardOfHex(Hex: string): Cardinal;
+function IntOfBin(Bin: string): Cardinal;
+
+function BinOfIntFill(n: cardinal; MinCount: Integer = 8): string;
+function BinOfInt(n: cardinal): string;
+
+function BaseNOfInt(I: Cardinal; B: TBaseN): string;
+function IntOfBaseN(V: string; B: TBaseN): Cardinal;
+
+{ Ranges }
+function KeepIn(i, Bottom, Top: Variant): Variant;
+function InRange(Value, Bottom, Top: Variant): Boolean;
+function InStrictRange(Value, Bottom, Top: Variant): Boolean;
+function Min(const A, B: Integer): Integer; overload;
+function Min(const A: TIntA): Integer; overload;
+function Max(const A, B: Integer): Integer; overload;
+function Max(const A: TIntA): Integer; overload;
+
+const
+ RangesSeparator = ',';
+ RangeInnerSeparator = '-';
+ RangeInfinite = '*';
+ RangeSpecialChars = [RangesSeparator, RangeInnerSeparator, RangeInfinite];
+
+function RangesOfStr(const S: string): TRanges;
+function InRanges(Ranges: TRanges; TestValue: Cardinal): Boolean;
+
+function Success(Res: Integer; ResultOnSuccess: Integer = ERROR_SUCCESS): Boolean;
+function Failure(Res: Integer; ResultOnSuccess: Integer = ERROR_SUCCESS): Boolean;
+
+function ExpandString(const S: string): string;
+
+{ Files }
+procedure DeleteFiles(const Mask: string; ScanSubDirs: Boolean = True;
+ Attributes: Integer = faFindEveryFile);
+procedure FileNew(const FileName: string);
+function DateTimeOfFileTime(const FileTime: TFileTime): TDateTime;
+
+{ FileNames }
+function GetFileNew(FileName: string; NoFloppyDrives: Boolean = True): string;
+
+{ Finding Files }
+function FindAll(Strings: TStrings; const Mask: string;
+ ScanSubDirs: Boolean = True; Attributes: Integer = faFindEveryFile;
+ FileReturn: TFileNameFunc = nil): Boolean;
+function FindAllFirst(const Mask: string; ScanSubDirs: Boolean = True;
+ Attributes: Integer = faFindEveryFile): string;
+
+function FullOSInfo: string;
+function Win32PlatformStr: string;
+function Win9x: Boolean;
+function WinNT: Boolean;
+function Win2000: Boolean;
+function WinXP: Boolean;
+
+var
+ MyDir: string = '';
+ LastSuccessRes: Integer = 0;
+
+{ Backward compatibility }
+{$IFNDEF VER130}
+function SameText(const S1, S2: string): Boolean;
+{$ENDIF}
+
+implementation
+{$IFNDEF VER140}
+uses FileCtrl;
+{$ENDIF}
+
+{$IFNDEF VER130}
+function SameText(const S1, S2: string): Boolean;
+begin
+ Result := CompareText(S1, S2) = 0;
+end;
+{$ENDIF}
+
+procedure Report(const ReportName, Text: string);
+var
+ F: TextFile;
+ FileName: string;
+begin
+ FileName := MyDir + ReportName + '.rep';
+ Assign(F, FileName);
+ try
+ if not FileExists(FileName) then
+ Rewrite(F)
+ else
+ Append(F);
+ WriteLn(F, Text);
+ finally
+ Close(F);
+ end;
+end;
+
+procedure ClearReport(const ReportName: string);
+var
+ FileName: string;
+begin
+ FileName := MyDir + ReportName + '.rep';
+ DeleteFile(FileName);
+end;
+
+procedure ReportFmt(const ReportName, Fmt: string; const Args: array of const);
+begin
+ Report(ReportName, Format(Fmt, Args));
+end;
+
+procedure GetParams(Strings: TStrings);
+var
+ P: PChar;
+ Param: string;
+
+ function GetParamStr(var P: PChar; var Param: string): Boolean;
+ var
+ Quoted: Boolean;
+ begin
+ Param := '';
+
+ repeat
+ while (P[0] <> #0) and (P[0] <= ' ') do
+ Inc(P);
+
+ Quoted := False;
+ while P[0] <> #0 do begin
+ if P[0] = '"' then begin
+ Quoted := not Quoted;
+ Inc(P);
+ Continue; end;
+ if (P[0] <= ' ') and not Quoted then
+ Break;
+ Param := Param + P[0];
+ Inc(P);
+ end;
+ until (Param <> '') or (P[0] = #0);
+
+ Result := Param <> '';
+ end;
+
+begin
+ Strings.Clear;
+ P := GetCommandLine;
+ GetParamStr(P, Param);
+ while GetParamStr(P, Param) do
+ Strings.Add(Param);
+end;
+
+function GetParams(const Separator: string = ' '): string;
+var
+ SL: TStringList;
+begin
+ SL := TStringList.Create;
+ GetParams(SL);
+ Result := Join(SL, Separator);
+ SL.Free;
+end;
+
+function Switch(const Switch: string; const PrefixChars: TCharSet = SwitchChars;
+ IgnoreCase: Boolean = True): Boolean;
+//= SysUtils.FindCmdLineSwitch
+var
+ i: Integer;
+ s: string;
+begin
+ Result := True;
+
+ for i := 1 to ParamCount do begin
+ s := ParamStr(i);
+
+ if (s <> '') and (s[1] in PrefixChars) then begin
+ //i know that always s <> '', but this is saver
+ s := Copy(s, 2, MaxInt);
+ if (s = Switch) or (IgnoreCase and (0=AnsiCompareText(s, Switch))) then
+ Exit;
+ end;
+ end;
+
+ Result := False;
+end;
+
+function ParamNum(const S: string): Integer;
+begin
+ for Result := 1 to ParamCount do
+ if 0=AnsiCompareText(ParamStr(Result), S) then
+ Exit;
+
+ Result := 0;
+end;
+
+function ParamPrefixNum(const Prefix: string): Integer;
+var
+ Len: Integer;
+begin
+ Len := Length(Prefix);
+ for Result := 1 to ParamCount do
+ if 0=AnsiCompareText(Copy(ParamStr(Result), 1, Len), Prefix) then
+ Exit;
+
+ Result := 0;
+end;
+
+function Param(const S: string): Boolean;
+begin
+ Result := ParamNum(S) > 0;
+end;
+
+function ParamPrefix(const Prefix: string): Boolean;
+begin
+ Result := ParamPrefixNum(Prefix) > 0;
+end;
+
+function GetParam(const Prefix: string = ''; const Default: string = ''): string;
+var
+ i: Integer;
+begin
+ Result := Default;
+
+ if Prefix = '' then begin
+ Result := ParamStr(1);
+ Exit; end;
+
+ i := ParamPrefixNum(Prefix);
+ if i > 0 then
+ Result := Copy(ParamStr(i), Length(Prefix) + 1, MaxInt);
+end;
+
+function GetMyDir(FullPath: Boolean = False): string;
+var
+ Buffer: array[0..260] of Char;
+begin
+ Result := '';
+ SetString(Result, Buffer, GetModuleFileName(0, Buffer, SizeOf(Buffer)));
+ if FullPath then
+ Result := GetFileNew(Result);
+ Result := ExtractPath(Result);
+end;
+
+function WinDir: string;
+var
+ Res: PChar;
+begin
+ Result := '\';
+ GetMem(Res, MAX_PATH);
+ GetWindowsDirectory(Res, MAX_PATH);
+ Result := Res + '\';
+ FreeMem(Res, MAX_PATH);
+end;
+
+function SysDir: string;
+var
+ Res: PChar;
+begin
+ Result := '\';
+ GetMem(Res, MAX_PATH);
+ GetSystemDirectory(Res, MAX_PATH);
+ Result := Res + '\';
+ FreeMem(Res, MAX_PATH);
+end;
+
+function UserName: string;
+var
+ Len: Cardinal;
+ Res: PChar;
+begin
+ Result := '';
+ GetMem(Res, MAX_PATH);
+ Len := MAX_PATH;
+ GetUserName(Res, Len);
+ Result := Res;
+ FreeMem(Res, MAX_PATH);
+end;
+
+function FirstChar(const S: string): Char;
+begin
+ if s = '' then
+ Result := #0
+ else
+ Result := s[1];
+end;
+
+function LastChar(const S: string): Char;
+begin
+ if s = '' then
+ Result := #0
+ else
+ Result := s[Length(s)];
+end;
+
+function CharPos(C: Char; const S: string; Offset: Integer = 1): Integer;
+var
+ MaxPosToSearch: Integer;
+begin
+ Result := Offset;
+ MaxPosToSearch := Length(S);
+
+ while Result <= MaxPosToSearch do begin
+ if S[Result] = C then
+ Exit;
+ Inc(Result);
+ end;
+
+ Result := 0;
+end;
+
+function CharPos(C: TCharSet; const S: string; Offset: Integer = 1): Integer;
+var
+ MaxPosToSearch: Integer;
+begin
+ Result := Offset;
+ MaxPosToSearch := Length(S);
+
+ while Result <= MaxPosToSearch do begin
+ if S[Result] in C then
+ Exit;
+ Inc(Result);
+ end;
+
+ Result := 0;
+end;
+
+function CharPosR(C: Char; const S: string; Offset: Integer = -1): Integer;
+begin
+ if Offset < 0 then
+ Result := Length(S) + 1 - Offset
+ else
+ Result := Offset;
+ if Result > Length(S) then
+ Result := Length(S);
+
+ while Result > 0 do begin
+ if S[Result] = C then
+ Exit;
+ Dec(Result);
+ end;
+end;
+
+function PosEx(const SubStr, S: string; Offset: Integer = 1): Integer;
+var
+ MaxPosToSearch, LenSubStr, i: Integer;
+begin
+ if SubStr = '' then begin
+ Result := 0;
+ Exit; end;
+
+ if Offset < 1 then
+ Result := 1
+ else
+ Result := Offset;
+
+ LenSubStr := Length(SubStr);
+ MaxPosToSearch := Length(S) - LenSubStr + 1;
+
+ while Result <= MaxPosToSearch do begin
+ if S[Result] = SubStr[1] then begin
+ i := 1;
+
+ while (i < LenSubStr)
+ and (S[Result + i] = SubStr[i + 1]) do
+ Inc(i);
+
+ if i = LenSubStr then
+ Exit;
+ end;
+ Inc(Result);
+ end;
+
+ Result := 0;
+end;
+
+function PosExText(const SubStr, S: string; Offset: Integer = 1): Integer;
+var
+ MaxPosToSearch, LenSubStr, i: Integer;
+
+ function SameChar(a, b: Char): Boolean;
+ begin
+ Result := UpCase(a) = UpCase(b)
+ end;
+
+begin
+ if SubStr = '' then begin
+ Result := 0;
+ Exit; end;
+
+ if Offset < 1 then
+ Result := 1
+ else
+ Result := Offset;
+
+ LenSubStr := Length(SubStr);
+ MaxPosToSearch := Length(S) - LenSubStr + 1;
+
+ while Result <= MaxPosToSearch do begin
+ if SameChar(S[Result], SubStr[1]) then begin
+ i := 1;
+
+ while (i < LenSubStr)
+ and (SameChar(S[Result + i], SubStr[i + 1])) do
+ Inc(i);
+
+ if i = LenSubStr then
+ Exit;
+ end;
+ Inc(Result);
+ end;
+
+ Result := 0;
+end;
+
+function PosExAnsiText(const SubStr, S: string; Offset: Integer = 1): Integer;
+var
+ MaxPosToSearch, LenSubStr, i: Integer;
+
+ function SameChar(a, b: Char): Boolean;
+ begin
+ Result := CharLower(PChar(a)) = CharLower(PChar(b));
+ end;
+
+begin
+ if SubStr = '' then begin
+ Result := 0;
+ Exit; end;
+
+ if Offset < 1 then
+ Result := 1
+ else
+ Result := Offset;
+
+ LenSubStr := Length(SubStr);
+ MaxPosToSearch := Length(S) - LenSubStr + 1;
+
+ while Result <= MaxPosToSearch do begin
+ if SameChar(S[Result], SubStr[1]) then begin
+ i := 1;
+
+ while (i < LenSubStr)
+ and (SameChar(S[Result + i], SubStr[i + 1])) do
+ Inc(i);
+
+ if i = LenSubStr then
+ Exit;
+ end;
+ Inc(Result);
+ end;
+
+ Result := 0;
+end;
+
+function UntilChar(const S: string; Brake: Char): string;
+var
+ p: Integer;
+begin
+ p := CharPos(Brake, S);
+
+ if p > 0 then
+ Result := Copy(S, 1, p - 1)
+ else
+ Result := S;
+end;
+
+function UntilChar(const S: string; Brake: TCharSet): string;
+var
+ p: Integer;
+begin
+ Result := '';
+ p := CharPos(Brake, S);
+
+ if p > 0 then
+ Result := Copy(S, 1, p - 1)
+ else
+ Result := S;
+end;
+
+function UntilLastChar(const S: string; Brake: Char;
+ IgnoreNoBrake: Boolean = True): string;
+var
+ p: Integer;
+begin
+ Result := '';
+ p := CharPosR(Brake, S);
+
+ if p > 0 then
+ Result := Copy(S, 1, p - 1)
+ else if IgnoreNoBrake then
+ Result := S;
+end;
+
+function FromChar(const S: string; Brake: Char): string;
+var
+ p: Integer;
+begin
+ Result := '';
+ p := CharPos(Brake, S);
+
+ if p > 0 then
+ Result := Copy(S, p + 1, Length(S) - p);
+end;
+
+function FromChar(const S: string; Brake: TCharSet): string;
+var
+ p: Integer;
+begin
+ Result := '';
+ p := CharPos(Brake, S);
+
+ if p > 0 then
+ Result := Copy(S, p + 1, Length(S) - p);
+end;
+
+function FromLastChar(const S: string; Brake: Char;
+ IgnoreNoBrake: Boolean = False): string;
+var
+ p: Integer;
+begin
+ Result := '';
+ p := CharPosR(Brake, S);
+
+ if p > 0 then
+ Result := Copy(S, p + 1, Length(S) - p)
+ else if IgnoreNoBrake then
+ Result := S;
+end;
+
+function BetweenChars(const S: string; Start, Finish: Char;
+ Inclusive: Boolean = False): string;
+var
+ p, fin: Integer;
+begin
+ Result := '';
+
+ p := CharPos(Start, S);
+ if p = 0 then
+ Exit;
+
+ fin := CharPos(Finish, S, p + 1);
+ if fin = 0 then
+ Exit;
+
+ if not Inclusive then begin
+ Inc(p);
+ Dec(fin);
+ end;
+
+ Result := Copy(S, p, fin - p + 1);
+end;
+
+function UntilStr(const S: string; Brake: string): string;
+var
+ p: Integer;
+begin
+ if Length(Brake) = 1 then begin
+ Result := UntilChar(S, Brake[1]);
+ Exit; end;
+
+ p := PosEx(Brake, S);
+
+ if p > 0 then
+ Result := Copy(S, 1, p - 1)
+ else
+ Result := S;
+end;
+
+function FromStr(const S: string; Brake: string): string;
+var
+ p: Integer;
+begin
+ if Length(Brake) = 1 then begin
+ Result := FromChar(S, Brake[1]);
+ Exit; end;
+
+ Result := '';
+ p := PosEx(Brake, s);
+
+ if p > 0 then begin
+ Inc(p, Length(Brake));
+ Result := Copy(S, p, Length(S) - p + 1);
+ end;
+end;
+
+function StringWrap(const S: string; Width: Integer; const LineEnd: string = EOL): string;
+var
+ i: Integer;
+begin
+ Result := '';
+ if (S = '') or (Width < 1) then
+ Exit;
+
+ i := 1;
+ while True do begin
+ Result := Result + Copy(S, i, Width);
+ Inc(i, Width);
+ if i <= Length(S) then
+ Result := Result + LineEnd
+ else
+ Exit;
+ end;
+end;
+
+function Split(const S, Separator: string; IgnoreMultiSep: Boolean = True;
+ MinCount: Integer = 0): TStrA;
+var
+ p, fin, SepLen: Integer;
+
+ procedure Add(const S: string);
+ begin
+ if IgnoreMultiSep and (S = '') then
+ Exit;
+ SetLength(Result, Length(Result) + 1);
+ Result[High(Result)] := S;
+ end;
+
+begin
+ if S = '' then begin
+ if Length(Result) < MinCount then
+ SetLength(Result, MinCount);
+ Exit; end;
+
+ Result := nil;
+ SepLen := Length(Separator);
+
+ p := 1;
+ fin := PosEx(Separator, S);
+ while fin > 0 do begin
+ Add(Copy(S, p, fin - p));
+ p := fin + SepLen;
+ fin := PosEx(Separator, S, p);
+ end;
+ Add(Copy(S, p, Length(S) - p + 1));
+
+ if Length(Result) < MinCount then
+ SetLength(Result, MinCount);
+end;
+
+procedure Split(const S, Separator: string; Strings: TStrings;
+ IgnoreMultiSep: Boolean = True);
+var
+ p, fin, SepLen: Integer;
+
+ procedure Add(const S: string);
+ begin
+ if IgnoreMultiSep and (S = '') then
+ Exit;
+ Strings.Add(S);
+ end;
+
+begin
+ if S = '' then
+ Exit;
+
+ Strings.BeginUpdate;
+ SepLen := Length(Separator);
+ p := 1;
+ fin := PosEx(Separator, S);
+ while fin > 0 do begin
+ Add(Copy(S, p, fin - p));
+ p := fin + SepLen;
+ fin := PosEx(Separator, S, p);
+ end;
+ Add(Copy(S, p, Length(S) - p + 1));
+ Strings.EndUpdate;
+end;
+
+function Split(const S: string; Separators: TCharSet;
+ IgnoreMultiSep: Boolean = True; MinCount: Integer = 0): TStrA;
+var
+ p, fin: Integer;
+
+ procedure Add(const S: string);
+ begin
+ if IgnoreMultiSep and (S = '') then
+ Exit;
+ SetLength(Result, Length(Result) + 1);
+ Result[High(Result)] := S;
+ end;
+
+begin
+ if S = '' then begin
+ if Length(Result) < MinCount then
+ SetLength(Result, MinCount);
+ Exit; end;
+
+ Result := nil;
+
+ p := 1;
+ fin := CharPos(Separators, S);
+ while fin > 0 do begin
+ Add(Copy(S, p, fin - p));
+ p := fin + 1;
+ fin := CharPos(Separators, S, p);
+ end;
+ Add(Copy(S, p, Length(S) - p + 1));
+
+ if Length(Result) < MinCount then
+ SetLength(Result, MinCount);
+end;
+
+procedure TileStr(const S: string; BrakeStart: Integer; BrakeEnd: Integer;
+ out Left, Right: string);
+begin
+ Left := Copy(S, 1, BrakeStart-1);
+ Right := Copy(S, BrakeEnd + 1, MaxInt);
+end;
+
+function Join(Strings: TStrings; Separator: string = ' '): string;
+var
+ i, imax: Integer;
+begin
+ Result := '';
+ imax := Strings.Count-1;
+ for i := 0 to imax do begin
+ Result := Result + Strings[i];
+ if i < imax then
+ Result := Result + Separator;
+ end;
+end;
+
+function Join(StrA: TStrA; Separator: string = ' '): string; overload;
+var
+ i: Integer;
+begin
+ Result := '';
+ for i := 0 to High(StrA) do begin
+ Result := Result + StrA[i];
+ if i < High(StrA) then
+ Result := Result + Separator;
+ end;
+end;
+
+function MulStr(const S: string; Count: Integer): string;
+var
+ P: PChar;
+ Len, i: Integer;
+begin
+ Result := '';
+ if Count = 0 then
+ Exit;
+
+ Len := Length(S);
+ SetLength(Result, Len * Count);
+
+ P := Pointer(Result);
+ for i := 1 to Count do begin
+ Move(Pointer(S)^, P^, Len);
+ Inc(P, Len);
+ end;
+end;
+
+function AlignR(const S: string; Width: Integer; Filler: Char = ' '): string;
+begin
+ Result := MulStr(Filler, Width - Length(S)) + S;
+end;
+
+function MaxStr(const S: string; MaxLen: Integer): string;
+var
+ Len: Integer;
+begin
+ Len := Length(S);
+ if Len <= MaxLen then begin
+ Result := S;
+ Exit end;
+
+ Result := Copy(S, 1, MaxLen - 3) + '...';
+end;
+
+function TrimAll(const S: string): string;
+var
+ i: Integer;
+begin
+ for i := 1 to Length(S) do
+ if S[i] > #32 then
+ Result := Result + S[i];
+end;
+
+function ControlChar(C: Char): Boolean;
+begin
+ Result := C in StrangeChars;
+end;
+
+function FriendlyChar(C: Char): Char;
+begin
+ case C of
+ #0: Result := '.';
+ #1..#31: Result := '?';
+ #255: Result := '#';
+ else
+ Result := C;
+ end;
+end;
+
+function FriendlyStr(const S: string): string;
+var
+ i: Integer;
+begin
+ SetLength(Result, Length(S));
+ for i := 1 to Length(S) do
+ Result[i] := FriendlyChar(S[i]);
+end;
+
+function FriendlyStr(a: TByteA): string;
+var
+ i: Integer;
+begin
+ SetLength(Result, Length(a));
+ for i := 0 to High(a) do
+ Result[i + 1] := FriendlyChar(Char(a[i]));
+end;
+
+function Quote(const S: string; Quoter: Char = '"'): string;
+begin
+ Result := S;
+
+ if FirstChar(S) <> Quoter then
+ Result := Quoter + Result;
+
+ if LastChar(S) <> Quoter then
+ Result := Result + Quoter;
+end;
+
+function DeQuote(const S: string): string;
+begin
+ Result := '';
+ if Length(S) > 2 then
+ Result := Copy(S, 2, Length(S) - 2);
+end;
+
+function UnQuote(const S: string): string;
+var
+ Start, Len: Integer;
+begin
+ Start := 1;
+ Len := Length(S);
+
+ if (S <> '') and (S[1] in ([#0..#32] + QuoteChars)) then begin
+ if (LastChar(S) = S[1]) then
+ Dec(Len);
+ Inc(Start);
+ end;
+
+ Result := Copy(S, Start, Len - Start + 1);
+end;
+
+function StrNumerus(const Value: Integer; const Singular, Plural: string;
+ const Zero: string = '0'): string;
+begin
+ if Abs(Value) = 1 then
+ Result := IntToStr(Value) + ' ' + Singular
+ else if Value = 0 then
+ Result := Zero + ' ' + Plural
+ else
+ Result := IntToStr(Value) + ' ' + Plural;
+end;
+
+function MakeStr(const Items: array of const; Separator: string = ''): string;
+const
+ BoolStrings: array[Boolean] of string = ('False', 'True');
+
+var
+ i: Integer;
+
+ function StrOfP(P: Pointer): string;
+ begin
+ if P = nil then
+ Result := '[nil]'
+ else
+ Result := '[' + IntToStr(Cardinal(P)) + ']';
+ end;
+
+ procedure Add(const S: string);
+ begin
+ Result := Result + s + Separator;
+ end;
+
+begin
+ Result := '';
+ for i := 0 to High(Items) do
+ with Items[i] do
+ case VType of
+ vtString: Add(VString^);
+ vtInteger: Add(IntToStr(VInteger));
+ vtBoolean: Add(BoolStrings[VBoolean]);
+ vtChar: Add(VChar);
+ vtPChar: Add(VPChar);
+ vtExtended: Add(FloatToStr(VExtended^));
+ vtObject: if VObject is TComponent then
+ Add(TComponent(VObject).Name)
+ else
+ Add(VObject.ClassName);
+ vtClass: Add(VClass.ClassName);
+ vtAnsiString: Add(string(VAnsiString));
+ vtCurrency: Add(CurrToStr(VCurrency^));
+ vtInt64: Add(IntToStr(VInt64^));
+ vtVariant: Add(string(VVariant^));
+
+ vtWideChar: Add(VWideChar);
+ vtPWideChar: Add(VPWideChar);
+ vtInterface: Add(StrOfP(VInterface));
+ vtPointer: Add(StrOfP(VPointer));
+ vtWideString: Add(WideString(VWideString));
+ end;
+ if Result <> '' then
+ SetLength(result, Length(Result) - Length(Separator));
+end;
+
+procedure ShowText(const Items: array of const; Separator: string = '');
+var
+ Text: string;
+begin
+ Text := MakeStr(Items, Separator);
+
+ MessageBox(0, PChar(Text), 'Info', MB_OK and MB_APPLMODAL);
+end;
+
+function DeleteChars(const S: string; C: Char): string;
+var
+ i: Integer;
+begin
+ Result := '';
+ for i := 1 to Length(S) do
+ if S[i] <> C then
+ Result := Result + S[i];
+end;
+
+function DeleteChars(const S: string; C: TCharSet): string;
+var
+ i: Integer;
+begin
+ Result := '';
+ for i := 1 to Length(S) do
+ if not (S[i] in C) then
+ Result := Result + S[i];
+end;
+
+function ExtractChars(const S: string; C: TCharSet): string;
+var
+ i: Integer;
+begin
+ Result := '';
+ for i := 1 to Length(S) do
+ if S[i] in C then
+ Result := Result + S[i];
+end;
+
+function CharCount(const S: string; C: Char): Integer;
+var
+ i: Integer;
+begin
+ Result := 0;
+ for i := 1 to Length(S) do
+ if S[i] = C then
+ Inc(Result);
+end;
+
+function StrAtPos(const S: string; Pos: Integer; const Str: string): Boolean;
+begin
+ Result := (Str <> '') and (Str = Copy(S, Pos, Length(Str)));
+end;
+
+function TextAtPos(const S: string; Pos: Integer; const Text: string): Boolean;
+begin
+ Result := (Text <> '') and SameText(Text, Copy(S, Pos, Length(Text)));
+end;
+
+function StrAtBegin(const S, Str: string): Boolean;
+begin
+ Result := StrAtPos(S, 1, Str);
+end;
+
+function TextAtBegin(const S, Text: string): Boolean;
+begin
+ Result := TextAtPos(S, 1, Text);
+end;
+
+function CharIn(const S: string; C: Char): Boolean;
+var
+ i: Integer;
+begin
+ Result := True;
+ for i := 1 to Length(S) do
+ if S[i] = C then Exit;
+ Result := False;
+end;
+
+function CharIn(const S: string; C: TCharSet): Boolean;
+var
+ i: Integer;
+begin
+ Result := False;
+ for i := 1 to Length(S) do begin
+ Result := S[i] in C;
+ if Result then
+ Exit;
+ end;
+end;
+
+function StrIn(const S, SubStr: string): Boolean;
+begin
+ Result := PosEx(SubStr, S) > 0;
+end;
+
+function StrIn(SL: TStrings; const S: string): Boolean;
+var
+ i: Integer;
+begin
+ Result := False;
+ for i := 0 to SL.Count-1 do begin
+ Result := (S = SL[i]);
+ if Result then
+ Exit;
+ end;
+end;
+
+function StrIn(A: TStrA; const S: string): Boolean;
+var
+ i: Integer;
+begin
+ Result := False;
+ for i := Low(A) to High(A) do begin
+ Result := (S = A[i]);
+ if Result then
+ Exit;
+ end;
+end;
+
+function TextIn(const S, Text: string): Boolean;
+begin
+ Result := PosExText(Text, S) > 0;
+end;
+
+function TextIn(SL: TStrings; const Text: string): Boolean;
+var
+ i: Integer;
+begin
+ Result := False;
+ for i := 0 to SL.Count-1 do begin
+ Result := SameText(Text, SL[i]);
+ if Result then
+ Exit;
+ end;
+end;
+
+function TextIn(A: TStrA; const Text: string): Boolean;
+var
+ i: Integer;
+begin
+ Result := False;
+ for i := Low(A) to High(A) do begin
+ Result := SameText(Text, A[i]);
+ if Result then
+ Exit;
+ end;
+end;
+
+function StrIndex(SL: TStrings; const S: string): Integer;
+begin
+ for Result := 0 to SL.Count-1 do
+ if S = SL[Result] then
+ Exit;
+ Result := -1;
+end;
+
+function StrIndex(A: TStrA; const S: string): Integer;
+begin
+ for Result := Low(A) to High(A) do
+ if S = A[Result] then
+ Exit;
+ Result := -1;
+end;
+
+function TextIndex(SL: TStrings; const Text: string): Integer;
+begin
+ for Result := 0 to SL.Count-1 do
+ if SameText(Text, SL[Result]) then
+ Exit;
+ Result := -1;
+end;
+
+function TextIndex(A: TStrA; const Text: string): Integer;
+begin
+ for Result := Low(A) to High(A) do
+ if SameText(Text, A[Result]) then
+ Exit;
+ Result := -1;
+end;
+
+function ReplaceChars(const S: string; Old, New: Char): string;
+var
+ i: Integer;
+begin
+ Result := S;
+ for i := 1 to Length(Result) do
+ if Result[i] = Old then
+ Result[i] := New;
+end;
+
+function ReplaceChars(const S: string; Old: TCharSet; New: Char): string;
+var
+ i: Integer;
+begin
+ Result := S;
+ for i := 1 to Length(Result) do
+ if Result[i] in Old then
+ Result[i] := New;
+end;
+
+function Replace(const S, Old, New: string): string;
+var
+ oldp, ps: Integer;
+begin
+ ps := 1;
+ Result := '';
+ while True do begin
+ oldp := ps;
+ ps := PosEx(Old, S, oldp);
+ if ps = 0 then begin
+ Result := Result + Copy(S, oldp, Length(S) - oldp + 1);
+ Exit; end;
+ Result := Result + Copy(S, oldp, ps - oldp) + New;
+ Inc(ps, Length(Old));
+ end;
+end;
+
+function SLOfFile(const FileName: string): TStringList;
+begin
+ Result := TStringList.Create;
+ if FileExists(FileName) then
+ Result.LoadFromFile(FileName);
+end;
+
+function ContainsEmptyLines(SL: TStrings): Boolean;
+begin
+ Result := StrIn(SL, '');
+end;
+
+procedure DeleteEmptyLines(SL: TStrings);
+var
+ i: Integer;
+begin
+ i := 0;
+ while i < SL.Count do begin
+ if SL[i] = '' then
+ SL.Delete(i)
+ else
+ Inc(i);
+ end;
+end;
+
+procedure DeleteCommentLines(SL: TStrings; const CommentSign: string = '//');
+var
+ i: Integer;
+begin
+ i := 0;
+ while i < SL.Count do begin
+ if (SL[i] = '') or (StrAtBegin(TrimLeft(SL[i]), CommentSign)) then
+ SL.Delete(i)
+ else
+ Inc(i);
+ end;
+end;
+
+function FindLine(SL: TStrings; const S: string): Integer;
+begin
+ for Result := 0 to SL.Count-1 do
+ if TextAtBegin(SL[Result], S) then
+ Exit;
+ Result := -1;
+end;
+
+procedure QuickSortSL(SL: TStringList);
+
+ procedure Sort(l, r: Integer);
+ var
+ i,j: Integer;
+ z,x: string;
+ begin
+ i := l;
+ j := r;
+ x := SL[(j + i) div 2];
+ repeat
+ while SL[i] < x do Inc(i);
+ while SL[j] > x do Dec(j);
+ if i <= j then begin
+ z := SL[i];
+ SL[i] := SL[j];
+ SL[j] := z;
+ Inc(i); Dec(j);
+ end;
+ until i > j;
+ if j > l then Sort(l, j);
+ if i < r then Sort(i, r);
+ end;
+
+begin
+ if SL.Count > 0 then
+ Sort(0, SL.Count-1);
+end;
+
+function IncStrA(StrA: TStrA): Integer;
+begin
+ SetLength(StrA, Length(StrA) + 1);
+ Result := High(StrA);
+end;
+
+function StrOfByteA(a: TByteA): string;
+begin
+ Result := string(Copy(a, 0, Length(a)));
+end;
+
+function ByteAOfStr(const S: string): TByteA;
+begin
+ Result := TByteA(Copy(S, 1, Length(s)));
+end;
+
+function ByteAOfInt(i: Integer): TByteA;
+begin
+ SetLength(Result, SizeOf(Integer));
+ Move(i, Pointer(Result)^, SizeOf(Integer));
+end;
+
+function IntOfByteA(A: TByteA): Integer;
+begin
+ Result := 0;
+ Move(Pointer(A)^, Result, Min(Length(A), SizeOf(Integer)));
+end;
+
+function ByteAOfHex(const Hex: string): TByteA;
+var
+ i: Integer;
+ h: string;
+begin
+ h := ExtractChars(Hex, HexadecimalChars);
+ SetLength(Result, Length(h) div 2);
+ for i := 0 to High(Result) do
+ Result[i] := ByteOfHex(Copy(h, (i shl 1) + 1, 2));
+end;
+
+function SizeOfFile(const FileName: string): Integer;
+var
+ F: file;
+begin
+ AssignFile(F, FileName);
+ {$I-}Reset(F, 1);{$I+}
+ if IOResult = 0 then begin
+ Result := FileSize(F);
+ CloseFile(F);
+ end else
+ Result := 0;
+end;
+
+function FileEx(const FileName: string; AllowFolders: Boolean = False): Boolean;
+var
+ FindData: TWin32FindData;
+begin
+ if FileName = '' then begin
+ Result := False;
+ Exit; end;
+
+ Result := (AllowFolders and DirectoryExists(FileName)) or
+ (FindFirstFile(PChar(FileName), FindData) <> INVALID_HANDLE_VALUE);
+ Result := Result and not CharIn(FileName, WildCards);
+ Result := Result and (AllowFolders
+ or ((FindData.dwFileAttributes and FILE_ATTRIBUTE_DIRECTORY) = 0));
+end;
+
+function LWPSolve(const Dir: string): string;
+begin
+ if (Dir <> '') and (Dir[Length(Dir)] = '\') then begin
+ Result := Copy(Dir, 1, Length(Dir) - 1);
+ end else
+ Result := Dir;
+end;
+
+function LWPSlash(const Dir: string): string;
+begin
+ if (Dir <> '') and (Dir[Length(Dir)] = '\') then begin
+ Result := Copy(Dir, 1, Length(Dir));
+ end else
+ Result := Dir + '\';
+end;
+
+function ExtractDrive(const FileName: string): string;
+begin
+ Result := '';
+ if (Length(FileName) >= 2) and (FileName[2] = ':') then
+ Result := UpperCase(FileName[1] + ':\');
+end;
+
+function ExtractPath(const FileName: string): string;
+var
+ p: Integer;
+begin
+ p := CharPosR('\', FileName);
+ if P > 0 then
+ Result := Copy(FileName, 1, p)
+ else
+ Result := FileName;
+end;
+
+function ExtractPrefix(const FileName: string): string;
+begin
+ Result := UntilLastChar(ExtractFileName(FileName), '.');
+end;
+
+function ExtractSuffix(const FileName: string): string;
+begin
+ Result := FromLastChar(ExtractFileName(FileName), '.');
+end;
+
+function SameByteA(const A, B: TByteA): Boolean;
+begin
+ Result := (A = B) or ((Length(A) = Length(B)) and CompareMem(A, B, Length(A)));
+end;
+
+function Reverse(A: TByteA): TByteA;
+var
+ i: Integer;
+begin
+ SetLength(Result, Length(A));
+
+ for i := 0 to High(A) do
+ Result[High(Result) - i] := A[i];
+end;
+
+function Endian(i: Integer): Integer;
+type
+ EndianArray = packed array[0..3] of Byte;
+var
+ a, b: EndianArray;
+begin
+ a := EndianArray(i);
+ b[0] := a[3];
+ b[1] := a[2];
+ b[2] := a[1];
+ b[3] := a[0];
+ Result := Integer(b);
+end;
+
+function SaveByteA(Data: TByteA; const FileName: string;
+ Overwrite: Boolean = True): Boolean;
+var
+ F: file;
+begin
+ if FileExists(FileName) and not Overwrite then begin
+ Result := False;
+ Exit end;
+
+ AssignFile(F, FileName);
+ {$I-}Rewrite(F, 1);{$I+}
+ if IOResult = 0 then begin
+ if Length(Data) > 0 then
+ BlockWrite(F, Data[0], Length(Data));
+ CloseFile(F);
+ Result := True;
+ end else
+ Result := False;
+end;
+
+function LoadByteA(const FileName: string): TByteA;
+var
+ F: file;
+begin
+ AssignFile(F, FileName);
+ {$I-}Reset(F, 1);{$I+}
+ if IOResult = 0 then begin
+ SetLength(Result, FileSize(F));
+ if Length(Result) > 0 then
+ BlockRead(F, Result[0], FileSize(F));
+ CloseFile(F);
+ end else
+ SetLength(Result, 0);
+end;
+
+function IsValidFileName(const FileName: string): Boolean;
+begin
+ Result := (FileName <> '') and not CharIn(FileName, FileNameEnemies)
+ and CharIn(Trim(FileName), AllChars - ['.']);
+end;
+
+function MakeValidFileName(FileName: string; const Default: string = 'File'): string;
+begin
+ if FileName = '' then
+ FileName := Default;
+
+ if CharIn(FileName, FileNameEnemies) then
+ Result := ReplaceChars(FileName, FileNameEnemies, '_')
+ else if not CharIn(Trim(FileName), AllChars - ['.']) then
+ Result := Default
+ else
+ Result := FileName;
+end;
+
+function IsValidInteger(const S: string): Boolean;
+{const
+ LowInt = '2147483648';
+ HighInt = '2147483647';
+var
+ len, RealLen, i, o: Integer;
+ c: Char;
+begin
+ Result := False;
+ if S = '' then
+ Exit;
+
+ len := Length(S);
+ o := 1;
+
+ if S[1] = '-' then begin
+ if len = 1 then
+ Exit;
+ Inc(o);
+ while (o <= len) and (S[o] = '0') do
+ Inc(o);
+ if o > len then
+ Exit;
+ if o < len then begin
+ RealLen := len - o + 1;
+ if RealLen > Length(LowInt) then
+ Exit
+ else if RealLen = Length(LowInt) then begin
+ for i := 1 to Length(LowInt) do begin
+ c := S[i + o - 1];
+ if (c < '0') or (c > LowInt[i]) then
+ Exit;
+ if c in ['0'..Char((Byte(LowInt[i])-1))] then
+ Break;
+ end;
+ Inc(o, i);
+ end;
+ end;
+ end else begin
+ while (o <= len) and (S[o] = '0') do
+ Inc(o);
+ if o <= len then begin
+ RealLen := len - o + 1;
+ if RealLen > Length(HighInt) then
+ Exit
+ else if RealLen = Length(HighInt) then begin
+ for i := 1 to Length(HighInt) do begin
+ c := S[i + o - 1];
+ if (c < '0') or (c > HighInt[i]) then
+ Exit;
+ if c in ['0'..Char((Byte(HighInt[i])-1))] then
+ Break;
+ end;
+ Inc(o, i);
+ end;
+ end;
+ end;
+
+ for i := o to len do
+ if not (S[i] in ['0'..'9']) then
+ Exit;
+
+ Result := True; }
+var
+ i: Int64;
+begin
+ i := StrToInt64Def(S, High(Int64));
+ Result := (i >= Low(Integer)) and (i <= High(Integer));
+end;
+
+function IsValidCardinal(const S: string): Boolean;
+{const
+ HighCard = '4294967295';
+var
+ len, RealLen, i, o: Integer;
+begin
+ Result := False;
+ if S = '' then
+ Exit;
+
+ len := Length(S);
+ o := 1;
+
+ while (o <= len) and (S[o] = '0') do
+ Inc(o);
+ if o <= len then begin
+ RealLen := len - o + 1;
+ if RealLen > Length(HighCard) then
+ Exit
+ else if RealLen = Length(HighCard) then begin
+ for i := 1 to Length(HighCard) do begin
+ if S[i + o - 1] > HighCard[i] then
+ Exit;
+ if S[i + o - 1] in ['0'..Char((Byte(HighCard[i])-1))] then
+ Break;
+ end;
+ Inc(o, i);
+ end;
+ end;
+
+ for i := o to len do
+ if not (S[i] in ['0'..'9']) then
+ Exit;
+
+ Result := True; }
+var
+ i: Int64;
+begin
+ i := StrToInt64Def(S, -1);
+ Result := (i >= 0) and (i <= High(Cardinal));
+end;
+
+function StrOfBool(flag: Boolean; const TrueStr: string = 'True';
+ const FalseStr: string = 'False'): string;
+begin
+ if Flag then
+ Result := TrueStr
+ else
+ Result := FalseStr;
+end;
+
+function StrOfInt(i: Integer): string;
+begin
+{ if i = 0 then begin
+ Result := '0';
+ Exit end;
+
+ while i > 0 do begin
+ Result := Char(Byte('0') + (i mod 10)) + Result;
+ i := i div 10;
+ end;}
+ Result := IntToStr(i);
+end;
+
+function CardOfStr(const S: string): Cardinal;
+var
+ Res: Int64;
+begin
+ Res := StrToInt64Def(S, -1);
+ if Res > High(Cardinal) then
+ Res := High(Cardinal)
+ else if Res < 0 then
+ Res := 0;
+ Result := Cardinal(Res);
+end;
+
+function HexOrd(Hex: Char): THex;
+begin
+ case Hex of
+ '0'..'9':
+ Result := Byte(Hex) - 48;
+ 'A'..'F':
+ Result := Byte(Hex) - 55;
+ 'a'..'f':
+ Result := Byte(Hex) - 87;
+ else
+ Result := 0;
+ end;
+end;
+
+function ByteOfHex(Hex: THexByteStr): Byte;
+begin
+ Result := (HexOrd(Hex[1]) shl 4) + HexOrd(Hex[2]);
+end;
+
+function DecOfHex(const Hex: string): string;
+begin
+ Result := IntToStr(CardOfHex(Hex));
+end;
+
+function HexOfByte(b: Byte): THexByteStr;
+begin
+ Result := HexChar[(b and $F0) shr 4]
+ + HexChar[ b and $0F ];
+end;
+
+{function HexOfCard2(c: Cardinal): string;
+var
+ Data: array[0..(1 shl 4) - 1] of Char;
+ i: Integer;
+begin
+ for i := 0 to (1 shl 4) - 1 do
+ if i < 10 then
+ Data[i] := Char(Ord('0') + i)
+ else
+ Data[i] := Char(Ord('A') + i - 10);
+
+ Result := Data[(c and (((1 shl (1 shl 2)) - 1) shl (7 shl 2))) shr (7 shl 2)]
+ + Data[(c and (((1 shl (1 shl 2)) - 1) shl (6 shl 2))) shr (6 shl 2)]
+ + Data[(c and (((1 shl (1 shl 2)) - 1) shl (5 shl 2))) shr (5 shl 2)]
+ + Data[(c and (((1 shl (1 shl 2)) - 1) shl (4 shl 2))) shr (4 shl 2)]
+ + Data[(c and (((1 shl (1 shl 2)) - 1) shl (3 shl 2))) shr (3 shl 2)]
+ + Data[(c and (((1 shl (1 shl 2)) - 1) shl (2 shl 2))) shr (2 shl 2)]
+ + Data[(c and (((1 shl (1 shl 2)) - 1) shl (1 shl 2))) shr (1 shl 2)]
+ + Data[(c and (((1 shl (1 shl 2)) - 1) shl (0 shl 2))) shr (0 shl 2)];
+end; }
+
+function HexOfCard(i: Cardinal): string;
+var
+ a: Cardinal;
+begin
+ Result := '';
+ while i > 0 do begin
+ a := i and $F;
+ Result := HexChar[a] + Result;
+ i := i shr 4;
+ end;
+end;
+
+function HexOfCard(i: Cardinal; Digits: Integer): string;
+var
+ a: Cardinal;
+begin
+ Result := '';
+ while i > 0 do begin
+ a := i and $F;
+ Result := HexChar[a] + Result;
+ i := i shr 4;
+ end;
+ Result := MulStr('0', Digits - Length(Result)) + Result;
+end;
+
+function PascalHexArray(a: TByteA; Name: string): string;
+var
+ i, len: Integer;
+begin
+ Result := 'const' + EOL +
+ ' ' + Name + ': array[0..' + IntToStr(High(a)) + '] of Byte = (';
+
+ len := Length(a);
+ for i := 0 to len-1 do begin
+ if (i mod 19) = 0 then
+ Result := Result + EOL + ' ' + ' ';
+ Result := Result + '$' + HexOfByte(a[i]);
+ if i < len-1 then
+ Result := Result + ',';
+ end;
+ Result := Result + EOL + ' );';
+end;
+
+function HexOfByteA(a: TByteA; Blocks: Integer = 1;
+ const Splitter: string = ' '): string;
+var
+ i: Integer;
+begin
+ Result := '';
+
+ if Blocks > 0 then
+ for i := 0 to High(a) do begin
+ Result := Result + HexOfByte(a[i]);
+ if i < High(a) then
+ if ((i+1) mod Blocks) = 0 then
+ Result := Result + Splitter;
+ end
+ else
+ for i := 0 to High(a) do
+ Result := Result + HexOfByte(a[i]);
+end;
+
+function BinOfByteA(a: TByteA; Blocks: Integer = 4;
+ const Splitter: string = ' '): string;
+var
+ i, max: Integer;
+ Bit: Boolean;
+begin
+ Result := '';
+
+ if Blocks > 0 then begin
+ max := 8 * (High(a)) + 7;
+ for i := 0 to max do begin
+ Bit := 7-(i mod 8) in TBitSet(a[i div 8]);
+ Result := Result + Char(Byte('0') + Byte(Bit));
+ if i < max then
+ if ((i+1) mod Blocks) = 0 then
+ Result := Result + Splitter;
+ end;
+ end else
+ for i := 0 to High(a) do
+ Result := Result + Char(Byte('0') + a[i] shr (i and 8));
+end;
+
+function CardOfHex(Hex: string): Cardinal;
+var
+ i: Integer;
+begin
+ Result := 0;
+ Hex := Copy(ExtractChars(Hex, HexadecimalChars), 1, 8);
+
+ for i := 1 to Length(Hex) do
+ if Hex[i] <> '0' then
+ Inc(Result, HexOrd(Hex[i]) shl ((Length(Hex) - i) shl 2));
+end;
+
+function IntOfBin(Bin: string): Cardinal;
+var
+ i: Integer;
+begin
+ Result := 0;
+ Bin := Copy(ExtractChars(Bin, BinaryChars), 1, 32);
+
+ for i := Length(Bin) downto 1 do
+ if Bin[i] = '1' then
+ Inc(Result, 1 shl (Length(Bin) - i));
+end;
+
+function BinOfInt(n: Cardinal): string;
+var
+ a: Integer;
+begin
+ if n = 0 then begin
+ Result := '0';
+ exit; end;
+
+ Result := '';
+ while n > 0 do begin
+ a := n and 1;
+ Result := Char(a + Byte('0')) + Result;
+ n := n shr 1;
+ end;
+end;
+
+function BinOfIntFill(n: Cardinal; MinCount: Integer = 8): string;
+var
+ a: Integer;
+begin
+ if n = 0 then begin
+ Result := MulStr('0', MinCount);
+ Exit; end;
+
+ Result := '';
+ while n > 0 do begin
+ a := n and 1;
+ Result := Char(a + Byte('0')) + Result;
+ n := n shr 1;
+ end;
+ Result := MulStr('0', MinCount - Length(Result)) + Result;
+end;
+
+function BaseNOfInt(I: Cardinal; B: TBaseN): string;
+var
+ a: Integer;
+begin
+ if (B < 2) or (i = 0) then begin
+ Result := '0';
+ Exit; end;
+
+ Result := '';
+ while i > 0 do begin
+ a := i mod B;
+ Result := BaseNChar[a] + Result;
+ i := i div B;
+ end;
+end;
+
+function IntOfBaseN(V: string; B: TBaseN): Cardinal;
+var
+ i: Integer;
+ F: Cardinal;
+ c: Byte;
+begin
+ Result := 0;
+ V := TrimAll(V);
+ F := 1;
+ for i := Length(V) downto 1 do begin
+ c := Byte(UpCase(V[i]));
+ case Char(c) of
+ '0'..'9': c := c - 48;
+ 'A'..'Z': c := c - 55;
+ end;
+ if c < B then
+ Result := Result + Byte(c) * F;
+ F := F * B;
+ end;
+end;
+
+function KeepIn(i, Bottom, Top: Variant): Variant;
+begin
+ Result := i;
+ if Result > Top then
+ Result := Top
+ else if Result < Bottom then
+ Result := Bottom;
+end;
+
+function InRange(Value, Bottom, Top: Variant): Boolean;
+begin
+ Result := (Value >= Bottom) and (Value <= Top);
+end;
+
+function InStrictRange(Value, Bottom, Top: Variant): Boolean;
+begin
+ Result := (Value > Bottom) and (Value < Top);
+end;
+
+function Min(const A, B: Integer): Integer;
+begin
+ if A < B then
+ Result := A
+ else
+ Result := B;
+end;
+
+function Min(const A: TIntA): Integer;
+var
+ i: Integer;
+begin
+ Result := 0;
+ if Length(A) = 0 then
+ Exit;
+
+ Result := A[0];
+ for i := 1 to High(A) do
+ if A[i] < Result then
+ Result := A[i];
+end;
+
+function Max(const A, B: Integer): Integer;
+begin
+ if A > B then
+ Result := A
+ else
+ Result := B;
+end;
+
+function Max(const A: TIntA): Integer;
+var
+ i: Integer;
+begin
+ Result := 0;
+ if Length(A) = 0 then
+ Exit;
+
+ Result := A[0];
+ for i := 1 to High(A) do
+ if A[i] > Result then
+ Result := A[i];
+end;
+
+function RangesOfStr(const S: string): TRanges;
+var
+ SL: TStringList;
+ r, b, t: string;
+ i, p: Integer;
+
+ function TryStrToCard(const S: string; out Value: Cardinal): Boolean;
+ var
+ E: Integer;
+ begin
+ Val(S, Value, E);
+ Result := E = 0;
+ end;
+
+begin
+ Result := nil;
+ SL := TStringList.Create;
+ try
+ Split(S, RangesSeparator, SL);
+ SetLength(Result, SL.Count);
+ for i := 0 to SL.Count-1 do begin
+ r := SL[i];
+ with Result[i] do begin
+ p := CharPos(RangeInnerSeparator, r);
+ Simple := p = 0; // no '-' found
+ if Simple then begin
+ if r = RangeInfinite then begin // * --> *-*
+ Simple := False;
+ Bottom := Low(Bottom);
+ Top := High(Top);
+ end else if not TryStrToCard(r, Value) then
+ Break;
+
+ end else begin
+ TileStr(r, p, p, b, t);
+
+ if b = RangeInfinite then
+ Bottom := Low(Bottom)
+ else if not TryStrToCard(b, Bottom) then
+ Break;
+
+ if t = RangeInfinite then
+ Top := High(Top)
+ else if not TryStrToCard(t, Top) then
+ Break;
+ if Bottom > Top then begin
+ p := Bottom; Bottom := Top; Top := p;
+ end;
+ end;
+ end;
+ end;
+
+ if i <> SL.Count then
+ Result := nil;
+
+ finally
+ SL.Free;
+ end;
+end;
+
+function InRanges(Ranges: TRanges; TestValue: Cardinal): Boolean;
+var
+ i: Integer;
+begin
+ Result := True;
+
+ for i := 0 to High(Ranges) do
+ with Ranges[i] do
+ if Simple then begin
+ if TestValue = Value then
+ Exit;
+ end else begin
+ if InRange(TestValue, Bottom, Top) then
+ Exit;
+ end;
+
+ Result := False;
+end;
+
+procedure WriteSL(Strings: TStrings; const Prefix: string = '';
+ const Suffix: string = '');
+var
+ i: Integer;
+begin
+ for i := 0 to Strings.Count-1 do
+ WriteLn(Prefix + Strings[i] + Suffix);
+end;
+
+function Success(Res: Integer; ResultOnSuccess: Integer = ERROR_SUCCESS): Boolean;
+begin
+ Result := (Res = ResultOnSuccess);
+ LastSuccessRes := Res;
+end;
+
+function Failure(Res: Integer; ResultOnSuccess: Integer = ERROR_SUCCESS): Boolean;
+begin
+ Result := not Success(Res, ResultOnSuccess);
+end;
+
+function ExpandString(const S: string): string;
+var
+ Len: Integer;
+ P, Res: PChar;
+begin
+ Result := '';
+ P := PChar(S);
+ Len := ExpandEnvironmentStrings(P, nil, 0);
+ if Len = 0 then
+ Exit;
+
+ GetMem(Res, Len);
+ ExpandEnvironmentStrings(P, Res, Len);
+
+ Result := Res;
+ FreeMem(Res, Len);
+end;
+
+function FindAll(Strings: TStrings; const Mask: string;
+ ScanSubDirs: Boolean = True; Attributes: Integer = faFindEveryFile;
+ FileReturn: TFileNameFunc = nil): Boolean;
+var
+ Path, FileName: string;
+
+ procedure ScanDir(const Path, FileName: string);
+ var
+ PSR: TSearchRec;
+ Res: Integer;
+
+ procedure Add(const S: string);
+ begin
+ if S <> '' then
+ Strings.Add(S);
+ end;
+
+ begin
+ Res := FindFirst(Path + FileName, Attributes, PSR);
+ while Success(Res, 0) do begin
+ if Assigned(FileReturn) then
+ Add(FileReturn(Path + PSR.Name))
+ else
+ Add(Path + PSR.Name);
+ Res := FindNext(PSR);
+ end;
+ FindClose(PSR);
+ if not ScanSubDirs then
+ Exit;
+
+ Res := FindFirst(Path + '*', faDirectory, PSR);
+ while Success(Res, 0) do begin
+ if (PSR.Attr and faDirectory > 0)
+ and (PSR.Name <> '.') and (PSR.Name <> '..') then
+ ScanDir(Path + PSR.Name + '\', FileName);
+ Res := FindNext(PSR);
+ end;
+ FindClose(PSR);
+ end;
+
+begin
+ Strings.Clear;
+ Path := ExtractPath(Mask);
+ FileName := ExtractFileName(Mask);
+ ScanDir(Path, FileName);
+ Result := Strings.Count > 0;
+end;
+
+function FindAllFirst(const Mask: string; ScanSubDirs: Boolean = True;
+ Attributes: Integer = faFindEveryFile): string;
+var
+ Path, FileName: string;
+
+ function ScanDir(const Path, FileName: string): Boolean;
+ var
+ PSR: TSearchRec;
+ Res: Integer;
+ begin
+ Result := False;
+ if Success(FindFirst(Path + FileName, Attributes, PSR), 0) then begin
+ FindAllFirst := Path + PSR.Name;
+ Result := True;
+ FindClose(PSR);
+ Exit; end;
+ if not ScanSubDirs then
+ Exit;
+
+ Res := FindFirst(Path + '*', faDirectory, PSR);
+ while not Result and Success(Res, 0) do begin
+ if (PSR.Attr and faDirectory > 0)
+ and (PSR.Name <> '.') and (PSR.Name <> '..') then
+ Result := ScanDir(Path + PSR.Name + '\', FileName);
+ Res := FindNext(PSR);
+ end;
+ FindClose(PSR);
+ end;
+begin
+ Result := '';
+ Path := ExtractPath(Mask);
+ FileName := ExtractFileName(Mask);
+ ScanDir(Path, FileName);
+end;
+
+procedure DeleteFiles(const Mask: string; ScanSubDirs: Boolean = True;
+ Attributes: Integer = faFindEveryFile);
+var
+ Path, FileName: string;
+
+ procedure ScanDir(const Path, FileName: string);
+ var
+ PSR: TSearchRec;
+ Res: Integer;
+
+ procedure TryDeleteFile(const FileName: string);
+ begin
+ try
+ DeleteFile(Path + PSR.Name);
+ except
+ end;
+ end;
+
+ begin
+ Res := FindFirst(Path + FileName, Attributes, PSR);
+ while Success(Res, 0) do begin
+ TryDeleteFile(Path + PSR.Name);
+ Res := FindNext(PSR);
+ end;
+ FindClose(PSR);
+ if not ScanSubDirs then
+ Exit;
+
+ Res := FindFirst(Path + '*', faDirectory, PSR);
+ while Success(Res, 0) do begin
+ if (PSR.Attr and faDirectory > 0)
+ and (PSR.Name <> '.') and (PSR.Name <> '..') then begin
+ ScanDir(Path + PSR.Name + '\', FileName);
+ TryDeleteFile(Path + PSR.Name);
+ end;
+ Res := FindNext(PSR);
+ end;
+ FindClose(PSR);
+ end;
+begin
+ Path := ExtractPath(Mask);
+ FileName := ExtractFileName(Mask);
+ ScanDir(Path, FileName);
+end;
+
+function GetFileNew(FileName: string; NoFloppyDrives: Boolean = True): string;
+var
+ Drive: string;
+ pf, pd, Len: Integer;
+ PSR: TSearchRec;
+begin
+ Result := '';
+ FileName := Trim(FileName);
+ if Length(FileName) < 2 then
+ Exit;
+
+ Drive := ExtractDrive(FileName);
+ if not DirectoryExists(Drive) then
+ Exit;
+
+ if NoFloppyDrives and (Drive[1] in ['A', 'B']) then
+ Exit;
+
+ Len := Length(FileName);
+ Result := Drive;
+ pf := Length(Drive) + 1;
+ while pf <= Len do begin
+ if FileName[pf] = '\' then begin
+ Result := Result + '\';
+ Inc(pf);
+ Continue; end;
+
+ pd := CharPos('\', FileName, pf);
+ if pd = 0 then begin
+ if 0=FindFirst(Result + Copy(FileName, pf, MaxInt), faFindEveryFile, PSR) then begin
+ Result := Result + PSR.Name;
+ Break; end else begin
+ FindClose(PSR);
+ if 0=FindFirst(Result + Copy(FileName, pf, MaxInt), faDirectory, PSR) then
+ Result := Result + PSR.Name + '\'
+ else
+ Result := '';
+ FindClose(PSR);
+ if Result = '' then
+ Break;
+ end;
+ end;
+
+ if 0=FindFirst(Result + Copy(FileName, pf, pd - pf), faDirectory, PSR) then
+ Result := Result + PSR.Name + '\'
+ else
+ Result := '';
+ FindClose(PSR);
+ if Result = '' then
+ Break;
+
+ pf := pd + 1;
+ end;
+
+ if (Result <> '') and not FileEx(Result, True) then
+ Result := '';
+end;
+
+function DateTimeOfFileTime(const FileTime: TFileTime): TDateTime;
+var
+ LocalFileTime: TFileTime;
+ Res: Integer;
+begin
+ Result := 0;
+
+ FileTimeToLocalFileTime(FileTime, LocalFileTime);
+ if not FileTimeToDosDateTime(LocalFileTime, LongRec(Res).Hi,
+ LongRec(Res).Lo) then
+ Res := -1;
+
+ if (Res = -1) or (Res = 0) then
+ Exit;
+ try
+ Result := FileDateToDateTime(Res);
+ except
+ end;
+end;
+
+procedure FileNew(const FileName: string);
+var
+ Handle: Integer;
+begin
+ Handle := FileCreate(FileName);
+ FileClose(Handle);
+end;
+
+function Win32PlatformStr: string;
+const
+ PlatformStrings: array[VER_PLATFORM_WIN32s..VER_PLATFORM_WIN32_NT] of string =
+ ('VER_PLATFORM_WIN32s', 'VER_PLATFORM_WIN32_WINDOWS', 'VER_PLATFORM_WIN32_NT');
+begin
+ Result := PlatformStrings[Win32Platform];
+end;
+
+function FullOSInfo: string;
+begin
+ Result := Format(
+ 'Platform: %s' + EOL +
+ 'Version: %d.%d Build %d' + EOL +
+ 'CSD: %s',
+ [
+ Win32PlatformStr,
+ Win32MajorVersion, Win32MinorVersion, Win32BuildNumber,
+ Win32CSDVersion
+ ]
+ );
+end;
+
+function Win9x: Boolean;
+begin
+ Result := Win32Platform = VER_PLATFORM_WIN32_WINDOWS;
+end;
+
+function WinNT: Boolean;
+begin
+ Result := Win32Platform = VER_PLATFORM_WIN32_NT;
+end;
+
+function Win2000: Boolean;
+begin
+ Result := (Win32Platform = VER_PLATFORM_WIN32_NT)
+ and (Win32MajorVersion = 4);
+end;
+
+function WinXP: Boolean;
+begin
+ Result := Win32MajorVersion >= 5;
+end;
+
+initialization
+ MyDir := GetMyDir;
+
+end.
+
+unit FifoStream;
+
+interface
+
+uses Classes, windows, Dialogs;
+
+const
+ DefaultChunksize = 32768; // 32kb per chunk as default.
+
+type
+ PMemChunk = ^TMemChunk;
+ TMemChunk = record
+ Filled: Longword;
+ Read: Longword;
+ Data: pointer;
+ end;
+
+ TFifo = class
+ private
+ FBuffers: TList;
+ FChunksize: Longword;
+ FCritSect: TRTLCriticalSection;
+ FIsWinNT: boolean;
+ FBytesInFifo: LongWord;
+ protected
+ function GetBytesInFifo: LongWord;
+ public
+ constructor Create;
+ destructor Destroy; override;
+ procedure Write(Data: pointer; Size: LongWord);
+ procedure Read(Buff: pointer; var ReqSize: LongWord);
+ procedure PeekData(Buff: pointer; var ReqSize: LongWord);
+ published
+ property BytesInFifo: LongWord read FBytesInFifo;
+ end;
+
+implementation
+
+constructor TFifo.Create;
+begin
+ inherited;
+ FBuffers := TList.Create;
+ // set default chunksize...
+ FChunksize := DefaultChunksize;
+ InitializeCriticalSection(FCritSect);
+end;
+
+destructor TFifo.Destroy;
+var
+ I: Integer;
+begin
+ EnterCriticalSection(FCritSect);
+ for I := 0 to FBuffers.count - 1 do
+ begin
+ FreeMem(PMemChunk(Fbuffers[I]).Data);
+ Dispose(PMemChunk(Fbuffers[I]));
+ end;
+ FBuffers.Clear;
+ FBuffers.Free;
+ LeaveCriticalSection(FCritSect);
+
+ DeleteCriticalSection(FCritSect);
+ inherited;
+end;
+
+function TFifo.GetBytesInFifo: LongWord;
+begin
+ Result := 0;
+ if FBuffers.Count = 0 then
+ begin
+ exit;
+ end
+ else
+ begin
+ if FBuffers.Count > 1 then
+ Inc(Result, (FBuffers.Count - 1) * FChunkSize);
+ Inc(Result, PMemChunk(FBuffers[Fbuffers.Count - 1]).Filled);
+ Dec(Result, PMemChunk(FBuffers[0]).Read);
+ end;
+end;
+
+procedure TFifo.Write(Data: pointer; Size: LongWord);
+var
+ Privpointer: pointer;
+ PrivSize: LongWord;
+ Chunk: PMemChunk;
+ PosInChunk: pointer;
+begin
+ if LongWord(Data) = 0 then
+ begin
+ // null pointer? somebody is trying to fool us, get out...
+ Exit;
+ end;
+ EnterCriticalSection(FCritSect);
+ PrivPointer := Data;
+ PrivSize := 0;
+ // are already buffers there?
+ if FBuffers.count > 0 then
+ begin
+ // is the last one of them not completely filled?
+ if PMemChunk(FBuffers[FBuffers.count - 1]).filled < FChunksize then
+ // not completely filled, so fill up the buffer.
+ begin
+ Chunk := PMemChunk(FBuffers[FBuffers.count - 1]);
+ // fetch chunkdata.
+ PosInChunk := Chunk.Data;
+ // move to current fill pos...
+ Inc(LongWord(PosInChunk), Chunk.Filled);
+ // can we fill the chunk completely?
+ if Size > FChunksize - Chunk.Filled then
+ begin
+ // yes we can.
+ Move(PrivPointer^, PosInChunk^, FChunksize - Chunk.Filled);
+ Inc(PrivSize, FChunksize - Chunk.Filled);
+ Inc(LongWord(PrivPointer), FChunksize - Chunk.Filled);
+ Chunk.Filled := FChunkSize;
+ end
+ else
+ // we have to less data for filling the chunk completely,
+ // just put everything in.
+ begin
+ Move(PrivPointer^, PosInChunk^, Size);
+ Inc(PrivSize, Size);
+ Inc(Chunk.Filled, Size);
+ end;
+ end;
+ end;
+ // as long as we have remaining stuff put it into new chunks.
+ while (PrivSize < Size) do
+ begin
+ new(Chunk);
+ GetMem(Chunk.Data, FChunksize);
+ Chunk.Read := 0;
+ // can we fill an entire chunk with the remaining data?
+ if Privsize + FChunksize < Size then
+ begin
+ // yes we can, so put the stuff in.
+ Move(Privpointer^, Chunk.Data^, FChunksize);
+ Inc(LongWord(PrivPointer), FChunksize);
+ Inc(PrivSize, FChunksize);
+ Chunk.Filled := FChunksize;
+ end
+ else // we have to less data to fill the entire chunk, just put the remaining stuff in.
+ begin
+ Move(Privpointer^, Chunk.Data^, Size - Privsize);
+ Chunk.Filled := Size - Privsize;
+ Inc(PrivSize, Size - Privsize);
+ end;
+ Fbuffers.Add(Chunk);
+ end;
+ if Size <> Privsize then
+ Showmessage('miscalculation in TFifo.write');
+ FBytesInFifo := GetBytesInFifo;
+ LeaveCriticalSection(FCritSect);
+end;
+
+procedure TFifo.Read(Buff: pointer; var ReqSize: LongWord);
+var
+ PrivSize: Integer;
+ Privpos: pointer;
+ Chunk: PMemChunk;
+ ChunkPos: pointer;
+begin
+ if LongWord(Buff) = 0 then
+ begin
+ // null pointer? somebody is trying to fool us, get out...
+ Exit;
+ end;
+ EnterCriticalSection(FCritSect);
+ PrivSize := 0;
+ Privpos := Buff;
+ while FBuffers.Count > 0 do
+ begin
+ Chunk := PMemChunk(FBuffers[0]);
+ ChunkPos := Chunk.data;
+ Inc(LongWord(ChunkPos), Chunk.Read);
+ // does the remaining part of the chunk fit into the buffer?
+ if PrivSize + (Chunk.Filled - Chunk.read) < ReqSize then
+ begin // yep, it fits
+ Move(ChunkPos^, Privpos^, Chunk.Filled - Chunk.read);
+ Inc(PrivSize, Chunk.Filled - Chunk.read);
+ FreeMem(Chunk.Data);
+ Dispose(Chunk);
+ FBuffers.Delete(0);
+ end
+ else // remaining part didn't fit, get as much as we can and increment the
+ // read attribute.
+ begin
+ Move(ChunkPos^, Privpos^, ReqSize - PrivSize);
+ Inc(Chunk.read, ReqSize - PrivSize);
+ Inc(PrivSize, ReqSize - PrivSize);
+ // as we filled the buffer, we'll have to break here.
+ break;
+ end;
+ end;
+ FBytesInFifo := GetBytesInFifo;
+ LeaveCriticalSection(FCritSect);
+ ReqSize := PrivSize;
+end;
+
+// read Data from Stream without removing it from the Stream...
+
+procedure TFifo.PeekData(Buff: pointer; var ReqSize: LongWord);
+var
+ PrivSize: Integer;
+ Privpos: pointer;
+ Chunk: PMemChunk;
+ ChunkPos: pointer;
+ ChunkNr: Integer;
+begin
+ if LongWord(Buff) = 0 then
+ begin
+ // null pointer? somebody is trying to fool us, get out...
+ Exit;
+ end;
+ EnterCriticalSection(FCritSect);
+ PrivSize := 0;
+ Privpos := Buff;
+ ChunkNr := 0;
+ while FBuffers.Count > ChunkNr do
+ begin
+ Chunk := PMemChunk(FBuffers[ChunkNr]);
+ ChunkPos := Chunk.data;
+ Inc(LongWord(ChunkPos), Chunk.Read);
+ // does the remaining part of the chunk fit into the buffer?
+ if PrivSize + (Chunk.Filled - Chunk.read) < ReqSize then
+ begin // yep, it fits
+ Move(ChunkPos^, Privpos^, Chunk.Filled - Chunk.read);
+ Inc(PrivSize, Chunk.Filled - Chunk.read);
+ Inc(ChunkNr);
+ end
+ else // remaining part didn't fit, get as much as we can and increment the
+ // read attribute.
+ begin
+ Move(ChunkPos^, Privpos^, ReqSize - PrivSize);
+ Inc(PrivSize, ReqSize - PrivSize);
+ // as we filled the buffer, we'll have to break here.
+ break;
+ end;
+ end;
+ LeaveCriticalSection(FCritSect);
+ ReqSize := PrivSize;
+end;
+
+end.
diff --git a/bench/example.dump b/bench/example.dump Binary files differnew file mode 100644 index 0000000..0f4a3fb --- /dev/null +++ b/bench/example.dump diff --git a/bench/example.plain b/bench/example.plain new file mode 100644 index 0000000..9e9aefc --- /dev/null +++ b/bench/example.plain @@ -0,0 +1,201 @@ +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include "codegen.h" +#include "symboltable.h" +#include "stringbuffer.h" + +extern void yyerror(char* msg); + +static stringBuffer* staticVariableBuffer; +static stringBuffer* classInitBuffer; +static stringBuffer* currentMethodBuffer; +static stringBuffer* finishedMethodsBuffer; +static stringBuffer* mainBuffer; + +static int currentMethodBufferIndex; +static int currentMethodStackSize; +static int currentMethodStackSizeMax; +static int currentMethodNumberOfLocals; + +static int classInitBufferIndex; +static int classInitStackSize; +static int classInitStackSizeMax; + +static int labelCounter = 0; +static int global = 1; + +char tempString[MAX_LENGTH_OF_COMMAND]; + +extern char* className; /* from minako-syntax.y */ + +/* forward declarations */ +static void increaseStackby(int stackdiff); +char convertType(int type); + +void codegenInit() { + staticVariableBuffer = newStringBuffer(); + classInitBuffer = newStringBuffer(); + currentMethodBuffer = 0; + finishedMethodsBuffer = newStringBuffer(); + mainBuffer = newStringBuffer(); + + stringBufferAppend(mainBuffer, "; ------- Header --------------------------------------------"); + sprintf(tempString, ".class public synchronized %s", className); + stringBufferAppend(mainBuffer, tempString); + stringBufferAppend(mainBuffer, ".super java/lang/Object"); + stringBufferAppend(mainBuffer, "; -----------------------------------------------------------"); + stringBufferAppend(mainBuffer, ""); + + stringBufferAppend(finishedMethodsBuffer, "; ------- Constructor ---------------------------------------"); + stringBufferAppend(finishedMethodsBuffer, ".method public <init>()V"); + stringBufferAppend(finishedMethodsBuffer, "\t.limit stack 1"); + stringBufferAppend(finishedMethodsBuffer, "\t.limit locals 1"); + stringBufferAppend(finishedMethodsBuffer, "\taload_0"); + stringBufferAppend(finishedMethodsBuffer, "\tinvokenonvirtual java/lang/Object/<init>()V"); + stringBufferAppend(finishedMethodsBuffer, "\treturn"); + stringBufferAppend(finishedMethodsBuffer, ".end method"); + stringBufferAppend(finishedMethodsBuffer, "; -----------------------------------------------------------"); + stringBufferAppend(finishedMethodsBuffer, ""); + + stringBufferAppend(staticVariableBuffer, "; ------- Class Variables -----------------------------------"); + + stringBufferAppend(classInitBuffer, "; ------- Class Initializer ---------------------------------"); + stringBufferAppend(classInitBuffer, ".method static <clinit>()V"); + classInitBufferIndex = classInitBuffer->numberOfNextElement; + stringBufferAppend(classInitBuffer, "\t.limit locals 0"); + +} + +void codegenAppendCommand(char* cmd, int stackdiff) { + char tempString[MAX_LENGTH_OF_COMMAND]; + sprintf(tempString, "\t%s", cmd); + if (global) stringBufferAppend(classInitBuffer, tempString); + else stringBufferAppend(currentMethodBuffer, tempString); + increaseStackby(stackdiff); +} + +void codegenInsertCommand(int address, char* cmd, int stackdiff) { + char tempString[MAX_LENGTH_OF_COMMAND]; + sprintf(tempString, "\t%s", cmd); + if (global) stringBufferInsert(classInitBuffer, address, tempString); + else stringBufferInsert(currentMethodBuffer, address, tempString); + increaseStackby(stackdiff); +} + +void codegenAppendLabel(int label) { + char tempString[MAX_LENGTH_OF_COMMAND]; + sprintf(tempString, "Label%d:", label); + if (global) stringBufferAppend(classInitBuffer, tempString); + else stringBufferAppend(currentMethodBuffer, tempString); +} + +void codegenAddVariable(char* name, int type) { + /*fprintf(stderr, "add variable %s(%d) global=%d ", name, convertType(type), global);*/ + if (global) { + if (type == TYPE_INT) sprintf(tempString, ".field static %s %c", name, 'I'); + else if (type == TYPE_FLOAT) sprintf(tempString, ".field static %s %c", name, 'F'); + else if (type == TYPE_BOOLEAN) sprintf(tempString, ".field static %s %c", name, 'Z'); + else yyerror("compiler-intern error in codegenAddGlobalVariable().\n"); + stringBufferAppend(staticVariableBuffer, tempString); + } + else { + currentMethodNumberOfLocals++; + } +} + +int codegenGetNextLabel() { + return labelCounter++; +} + +int codegenGetCurrentAddress() { + if (global) return classInitBuffer->numberOfNextElement; + else return currentMethodBuffer->numberOfNextElement; +} + +void codegenEnterFunction(symtabEntry* entry) { + currentMethodBuffer = newStringBuffer(); + currentMethodStackSize = 0; + currentMethodStackSizeMax = 0; + labelCounter = 1; + global = 0; + + if (strcmp(entry->name, "main") == 0) { + if (entry->idtype != TYPE_VOID) yyerror("main has to be void.\n"); + currentMethodNumberOfLocals = 1; + symtabInsert(strdup("#main-param#"), TYPE_VOID, CLASS_FUNC); + stringBufferAppend(currentMethodBuffer, "; ------- Methode ---- void main() --------------------------"); + stringBufferAppend(currentMethodBuffer, ".method public static main([Ljava/lang/String;)V"); + } + else { + int i; + currentMethodNumberOfLocals = entry->paramIndex; + stringBufferAppend(currentMethodBuffer, "; ------- Methode -------------------------------------------"); + sprintf(tempString, ".method public static %s(", entry->name); + for (i=entry->paramIndex-1; i>=0; i--) { + int type = entry->params[i]->idtype; + tempString[strlen(tempString)+1] = 0; + tempString[strlen(tempString)] = convertType(type); + } + tempString[strlen(tempString)+2] = 0; + tempString[strlen(tempString)+1] = convertType(entry->idtype); + tempString[strlen(tempString)] = ')'; + stringBufferAppend(currentMethodBuffer, tempString); + } + currentMethodBufferIndex = currentMethodBuffer->numberOfNextElement; +} + +void codegenLeaveFunction() { + global = 1; + sprintf(tempString, "\t.limit locals %d", currentMethodNumberOfLocals); + stringBufferInsert(currentMethodBuffer, currentMethodBufferIndex, tempString); + sprintf(tempString, "\t.limit stack %d", currentMethodStackSizeMax); + stringBufferInsert(currentMethodBuffer, currentMethodBufferIndex, tempString); + stringBufferAppend(currentMethodBuffer, "\treturn"); + stringBufferAppend(currentMethodBuffer, ".end method"); + stringBufferAppend(currentMethodBuffer, "; -----------------------------------------------------------"); + stringBufferAppend(currentMethodBuffer, ""); + + stringBufferConcatenate(finishedMethodsBuffer, currentMethodBuffer); +} + + + +void codegenFinishCode() { + stringBufferAppend(staticVariableBuffer, "; -----------------------------------------------------------"); + stringBufferAppend(staticVariableBuffer, ""); + + sprintf(tempString, "\t.limit stack %d", classInitStackSizeMax); + stringBufferInsert(classInitBuffer, classInitBufferIndex, tempString); + stringBufferAppend(classInitBuffer, "\treturn"); + stringBufferAppend(classInitBuffer, ".end method"); + stringBufferAppend(classInitBuffer, "; -----------------------------------------------------------"); + + stringBufferConcatenate(mainBuffer, staticVariableBuffer); + stringBufferConcatenate(mainBuffer, finishedMethodsBuffer); + stringBufferConcatenate(mainBuffer, classInitBuffer); + + stringBufferPrint(mainBuffer); +} + +static void increaseStackby(int stackdiff) { + if (global) { + classInitStackSize += stackdiff; + if (classInitStackSize > classInitStackSizeMax) classInitStackSizeMax = classInitStackSize; + } + else { + currentMethodStackSize += stackdiff; + if (currentMethodStackSize > currentMethodStackSizeMax) currentMethodStackSizeMax = currentMethodStackSize; + } +} + +char convertType(int type) { + switch(type) { + case TYPE_VOID: return 'V'; + case TYPE_INT: return 'I'; + case TYPE_FLOAT: return 'F'; + case TYPE_BOOLEAN: return 'Z'; + default: yyerror("compiler-intern error in convertType().\n"); + } + return 0; /* to avoid compiler-warning */ +} diff --git a/bench/example.rb b/bench/example.rb new file mode 100644 index 0000000..c89d3ab --- /dev/null +++ b/bench/example.rb @@ -0,0 +1,10070 @@ +module CodeRay
+ module Scanners
+
+class Ruby < Scanner
+
+ RESERVED_WORDS = [
+ 'and', 'def', 'end', 'in', 'or', 'unless', 'begin',
+ 'defined?', 'ensure', 'module', 'redo', 'super', 'until',
+ 'BEGIN', 'break', 'do', 'next', 'rescue', 'then',
+ 'when', 'END', 'case', 'else', 'for', 'retry',
+ 'while', 'alias', 'class', 'elsif', 'if', 'not', 'return',
+ 'undef', 'yield',
+ ]
+
+ DEF_KEYWORDS = ['def']
+ MODULE_KEYWORDS = ['class', 'module']
+ DEF_NEW_STATE = WordList.new(:initial).
+ add(DEF_KEYWORDS, :def_expected).
+ add(MODULE_KEYWORDS, :module_expected)
+
+ WORDS_ALLOWING_REGEXP = [
+ 'and', 'or', 'not', 'while', 'until', 'unless', 'if', 'elsif', 'when'
+ ]
+ REGEXP_ALLOWED = WordList.new(false).
+ add(WORDS_ALLOWING_REGEXP, :set)
+
+ PREDEFINED_CONSTANTS = [
+ 'nil', 'true', 'false', 'self',
+ 'DATA', 'ARGV', 'ARGF', '__FILE__', '__LINE__',
+ ]
+
+ IDENT_KIND = WordList.new(:ident).
+ add(RESERVED_WORDS, :reserved).
+ add(PREDEFINED_CONSTANTS, :pre_constant)
+
+ METHOD_NAME = / #{IDENT} [?!]? /xo
+ METHOD_NAME_EX = /
+ #{METHOD_NAME} # common methods: split, foo=, empty?, gsub!
+ | \*\*? # multiplication and power
+ | [-+~]@? # plus, minus
+ | [\/%&|^`] # division, modulo or format strings, &and, |or, ^xor, `system`
+ | \[\]=? # array getter and setter
+ | <=?>? | >=? # comparison, rocket operator
+ | << | >> # append or shift left, shift right
+ | ===? # simple equality and case equality
+ /ox
+ GLOBAL_VARIABLE = / \$ (?: #{IDENT} | \d+ | [~&+`'=\/,;_.<>!@0$?*":F\\] | -[a-zA-Z_0-9] ) /ox
+
+ DOUBLEQ = / " [^"\#\\]* (?: (?: \#\{.*?\} | \#(?:$")? | \\. ) [^"\#\\]* )* "? /ox
+ SINGLEQ = / ' [^'\\]* (?: \\. [^'\\]* )* '? /ox
+ STRING = / #{SINGLEQ} | #{DOUBLEQ} /ox
+ SHELL = / ` [^`\#\\]* (?: (?: \#\{.*?\} | \#(?:$`)? | \\. ) [^`\#\\]* )* `? /ox
+ REGEXP = / \/ [^\/\#\\]* (?: (?: \#\{.*?\} | \#(?:$\/)? | \\. ) [^\/\#\\]* )* \/? /ox
+
+ DECIMAL = /\d+(?:_\d+)*/ # doesn't recognize 09 as octal error
+ OCTAL = /0_?[0-7]+(?:_[0-7]+)*/
+ HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/
+ BINARY = /0b[01]+(?:_[01]+)*/
+
+ EXPONENT = / [eE] [+-]? #{DECIMAL} /ox
+ FLOAT = / #{DECIMAL} (?: #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? ) /
+ INTEGER = /#{OCTAL}|#{HEXADECIMAL}|#{BINARY}|#{DECIMAL}/
+
+ def reset
+ super
+ @regexp_allowed = false
+ end
+
+ def next_token
+ return if @scanner.eos?
+
+ kind = :error
+ if @scanner.scan(/\s+/) # in every state
+ kind = :space
+ @regexp_allowed = :set if @regexp_allowed or @scanner.matched.index(?\n) # delayed flag setting
+
+ elsif @state == :def_expected
+ if @scanner.scan(/ (?: (?:#{IDENT}(?:\.|::))* | (?:@@?|$)? #{IDENT}(?:\.|::) ) #{METHOD_NAME_EX} /ox)
+ kind = :method
+ @state = :initial
+ else
+ @scanner.getch
+ end
+ @state = :initial
+
+ elsif @state == :module_expected
+ if @scanner.scan(/<</)
+ kind = :operator
+ else
+ if @scanner.scan(/ (?: #{IDENT} (?:\.|::))* #{IDENT} /ox)
+ kind = :method
+ else
+ @scanner.getch
+ end
+ @state = :initial
+ end
+
+ elsif # state == :initial
+ # IDENTIFIERS, KEYWORDS
+ if @scanner.scan(GLOBAL_VARIABLE)
+ kind = :global_variable
+ elsif @scanner.scan(/ @@ #{IDENT} /ox)
+ kind = :class_variable
+ elsif @scanner.scan(/ @ #{IDENT} /ox)
+ kind = :instance_variable
+ elsif @scanner.scan(/ __END__\n ( (?!\#CODE\#) .* )? | \#[^\n]* | =begin(?=\s).*? \n=end(?=\s|\z)(?:[^\n]*)? /mx)
+ kind = :comment
+ elsif @scanner.scan(METHOD_NAME)
+ if @last_token_dot
+ kind = :ident
+ else
+ matched = @scanner.matched
+ kind = IDENT_KIND[matched]
+ if kind == :ident and matched =~ /^[A-Z]/
+ kind = :constant
+ elsif kind == :reserved
+ @state = DEF_NEW_STATE[matched]
+ @regexp_allowed = REGEXP_ALLOWED[matched]
+ end
+ end
+
+ elsif @scanner.scan(STRING)
+ kind = :string
+ elsif @scanner.scan(SHELL)
+ kind = :shell
+ elsif @scanner.scan(/<<
+ (?:
+ ([a-zA-Z_0-9]+)
+ (?: .*? ^\1$ | .* )
+ |
+ -([a-zA-Z_0-9]+)
+ (?: .*? ^\s*\2$ | .* )
+ |
+ (["\'`]) (.+?) \3
+ (?: .*? ^\4$ | .* )
+ |
+ - (["\'`]) (.+?) \5
+ (?: .*? ^\s*\6$ | .* )
+ )
+ /mxo)
+ kind = :string
+ elsif @scanner.scan(/\//) and @regexp_allowed
+ @scanner.unscan
+ @scanner.scan(REGEXP)
+ kind = :regexp
+/%(?:[Qqxrw](?:\([^)#\\\\]*(?:(?:#\{.*?\}|#|\\\\.)[^)#\\\\]*)*\)?|\[[^\]#\\\\]*(?:(?:#\{.*?\}|#|\\\\.)[^\]#\\\\]*)*\]?|\{[^}#\\\\]*(?:(?:#\{.*?\}|#|\\\\.)[^}#\\\\]*)*\}?|<[^>#\\\\]*(?:(?:#\{.*?\}|#|\\\\.)[^>#\\\\]*)*>?|([^a-zA-Z\\\\])(?:(?!\1)[^#\\\\])*(?:(?:#\{.*?\}|#|\\\\.)(?:(?!\1)[^#\\\\])*)*\1?)|\([^)#\\\\]*(?:(?:#\{.*?\}|#|\\\\.)[^)#\\\\]*)*\)?|\[[^\]#\\\\]*(?:(?:#\{.*?\}|#|\\\\.)[^\]#\\\\]*)*\]?|\{[^}#\\\\]*(?:(?:#\{.*?\}|#|\\\\.)[^}#\\\\]*)*\}?|<[^>#\\\\]*(?:(?:#\{.*?\}|#|\\\\.)[^>#\\\\]*)*>?|([^a-zA-Z\s\\\\])(?:(?!\2)[^#\\\\])*(?:(?:#\{.*?\}|#|\\\\.)(?:(?!\2)[^#\\\\])*)*\2?|\\\\[^#\\\\]*(?:(?:#\{.*?\}|#)[^#\\\\]*)*\\\\?)/
+ elsif @scanner.scan(/:(?:#{GLOBAL_VARIABLE}|#{METHOD_NAME_EX}|#{STRING})/ox)
+ kind = :symbol
+ elsif @scanner.scan(/
+ \? (?:
+ [^\s\\]
+ |
+ \\ (?:M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-))? (?: \\ (?: . | [0-7]{3} | x[0-9A-Fa-f][0-9A-Fa-f] )
+ )
+ /mox)
+ kind = :integer
+
+ elsif @scanner.scan(/ [-+*\/%=<>;,|&!()\[\]{}~?] | \.\.?\.? | ::? /x)
+ kind = :operator
+ @regexp_allowed = :set if @scanner.matched[-1,1] =~ /[~=!<>|&^,\(\[+\-\/\*%]\z/
+ elsif @scanner.scan(FLOAT)
+ kind = :float
+ elsif @scanner.scan(INTEGER)
+ kind = :integer
+ else
+ @scanner.getch
+ end
+ end
+
+ token = Token.new @scanner.matched, kind
+
+ if kind == :regexp
+ token.text << @scanner.scan(/[eimnosux]*/)
+ end
+
+ @regexp_allowed = (@regexp_allowed == :set) # delayed flag setting
+
+ token
+ end
+end
+
+register Ruby, 'ruby', 'rb'
+
+ end
+end
+class Set
+ include Enumerable
+
+ # Creates a new set containing the given objects.
+ def self.[](*ary)
+ new(ary)
+ end
+
+ # Creates a new set containing the elements of the given enumerable
+ # object.
+ #
+ # If a block is given, the elements of enum are preprocessed by the
+ # given block.
+ def initialize(enum = nil, &block) # :yields: o
+ @hash ||= Hash.new
+
+ enum.nil? and return
+
+ if block
+ enum.each { |o| add(block[o]) }
+ else
+ merge(enum)
+ end
+ end
+
+ # Copy internal hash.
+ def initialize_copy(orig)
+ @hash = orig.instance_eval{@hash}.dup
+ end
+
+ # Returns the number of elements.
+ def size
+ @hash.size
+ end
+ alias length size
+
+ # Returns true if the set contains no elements.
+ def empty?
+ @hash.empty?
+ end
+
+ # Removes all elements and returns self.
+ def clear
+ @hash.clear
+ self
+ end
+
+ # Replaces the contents of the set with the contents of the given
+ # enumerable object and returns self.
+ def replace(enum)
+ if enum.class == self.class
+ @hash.replace(enum.instance_eval { @hash })
+ else
+ enum.is_a?(Enumerable) or raise ArgumentError, "value must be enumerable"
+ clear
+ enum.each { |o| add(o) }
+ end
+
+ self
+ end
+
+ # Converts the set to an array. The order of elements is uncertain.
+ def to_a
+ @hash.keys
+ end
+
+ def flatten_merge(set, seen = Set.new)
+ set.each { |e|
+ if e.is_a?(Set)
+ if seen.include?(e_id = e.object_id)
+ raise ArgumentError, "tried to flatten recursive Set"
+ end
+
+ seen.add(e_id)
+ flatten_merge(e, seen)
+ seen.delete(e_id)
+ else
+ add(e)
+ end
+ }
+
+ self
+ end
+ protected :flatten_merge
+
+ # Returns a new set that is a copy of the set, flattening each
+ # containing set recursively.
+ def flatten
+ self.class.new.flatten_merge(self)
+ end
+
+ # Equivalent to Set#flatten, but replaces the receiver with the
+ # result in place. Returns nil if no modifications were made.
+ def flatten!
+ if detect { |e| e.is_a?(Set) }
+ replace(flatten())
+ else
+ nil
+ end
+ end
+
+ # Returns true if the set contains the given object.
+ def include?(o)
+ @hash.include?(o)
+ end
+ alias member? include?
+
+ # Returns true if the set is a superset of the given set.
+ def superset?(set)
+ set.is_a?(Set) or raise ArgumentError, "value must be a set"
+ return false if size < set.size
+ set.all? { |o| include?(o) }
+ end
+
+ # Returns true if the set is a proper superset of the given set.
+ def proper_superset?(set)
+ set.is_a?(Set) or raise ArgumentError, "value must be a set"
+ return false if size <= set.size
+ set.all? { |o| include?(o) }
+ end
+
+ # Returns true if the set is a subset of the given set.
+ def subset?(set)
+ set.is_a?(Set) or raise ArgumentError, "value must be a set"
+ return false if set.size < size
+ all? { |o| set.include?(o) }
+ end
+
+ # Returns true if the set is a proper subset of the given set.
+ def proper_subset?(set)
+ set.is_a?(Set) or raise ArgumentError, "value must be a set"
+ return false if set.size <= size
+ all? { |o| set.include?(o) }
+ end
+
+ # Calls the given block once for each element in the set, passing
+ # the element as parameter.
+ def each
+ @hash.each_key { |o| yield(o) }
+ self
+ end
+
+ # Adds the given object to the set and returns self. Use +merge+ to
+ # add several elements at once.
+ def add(o)
+ @hash[o] = true
+ self
+ end
+ alias << add
+
+ # Adds the given object to the set and returns self. If the
+ # object is already in the set, returns nil.
+ def add?(o)
+ if include?(o)
+ nil
+ else
+ add(o)
+ end
+ end
+
+ # Deletes the given object from the set and returns self. Use +subtract+ to
+ # delete several items at once.
+ def delete(o)
+ @hash.delete(o)
+ self
+ end
+
+ # Deletes the given object from the set and returns self. If the
+ # object is not in the set, returns nil.
+ def delete?(o)
+ if include?(o)
+ delete(o)
+ else
+ nil
+ end
+ end
+
+ # Deletes every element of the set for which block evaluates to
+ # true, and returns self.
+ def delete_if
+ @hash.delete_if { |o,| yield(o) }
+ self
+ end
+
+ # Do collect() destructively.
+ def collect!
+ set = self.class.new
+ each { |o| set << yield(o) }
+ replace(set)
+ end
+ alias map! collect!
+
+ # Equivalent to Set#delete_if, but returns nil if no changes were
+ # made.
+ def reject!
+ n = size
+ delete_if { |o| yield(o) }
+ size == n ? nil : self
+ end
+
+ # Merges the elements of the given enumerable object to the set and
+ # returns self.
+ def merge(enum)
+ if enum.is_a?(Set)
+ @hash.update(enum.instance_eval { @hash })
+ else
+ enum.is_a?(Enumerable) or raise ArgumentError, "value must be enumerable"
+ enum.each { |o| add(o) }
+ end
+
+ self
+ end
+
+ # Deletes every element that appears in the given enumerable object
+ # and returns self.
+ def subtract(enum)
+ enum.is_a?(Enumerable) or raise ArgumentError, "value must be enumerable"
+ enum.each { |o| delete(o) }
+ self
+ end
+
+ # Returns a new set built by merging the set and the elements of the
+ # given enumerable object.
+ def |(enum)
+ enum.is_a?(Enumerable) or raise ArgumentError, "value must be enumerable"
+ dup.merge(enum)
+ end
+ alias + | ##
+ alias union | ##
+
+ # Returns a new set built by duplicating the set, removing every
+ # element that appears in the given enumerable object.
+ def -(enum)
+ enum.is_a?(Enumerable) or raise ArgumentError, "value must be enumerable"
+ dup.subtract(enum)
+ end
+ alias difference - ##
+
+ # Returns a new array containing elements common to the set and the
+ # given enumerable object.
+ def &(enum)
+ enum.is_a?(Enumerable) or raise ArgumentError, "value must be enumerable"
+ n = self.class.new
+ enum.each { |o| n.add(o) if include?(o) }
+ n
+ end
+ alias intersection & ##
+
+ # Returns a new array containing elements exclusive between the set
+ # and the given enumerable object. (set ^ enum) is equivalent to
+ # ((set | enum) - (set & enum)).
+ def ^(enum)
+ enum.is_a?(Enumerable) or raise ArgumentError, "value must be enumerable"
+ n = dup
+ enum.each { |o| if n.include?(o) then n.delete(o) else n.add(o) end }
+ n
+ end
+
+ # Returns true if two sets are equal. The equality of each couple
+ # of elements is defined according to Object#eql?.
+ def ==(set)
+ equal?(set) and return true
+
+ set.is_a?(Set) && size == set.size or return false
+
+ hash = @hash.dup
+ set.all? { |o| hash.include?(o) }
+ end
+
+ def hash # :nodoc:
+ @hash.hash
+ end
+
+ def eql?(o) # :nodoc:
+ return false unless o.is_a?(Set)
+ @hash.eql?(o.instance_eval{@hash})
+ end
+
+ # Classifies the set by the return value of the given block and
+ # returns a hash of {value => set of elements} pairs. The block is
+ # called once for each element of the set, passing the element as
+ # parameter.
+ #
+ # e.g.:
+ #
+ # require 'set'
+ # files = Set.new(Dir.glob("*.rb"))
+ # hash = files.classify { |f| File.mtime(f).year }
+ # p hash # => {2000=>#<Set: {"a.rb", "b.rb"}>,
+ # # 2001=>#<Set: {"c.rb", "d.rb", "e.rb"}>,
+ # # 2002=>#<Set: {"f.rb"}>}
+ def classify # :yields: o
+ h = {}
+
+ each { |i|
+ x = yield(i)
+ (h[x] ||= self.class.new).add(i)
+ }
+
+ h
+ end
+
+ # Divides the set into a set of subsets according to the commonality
+ # defined by the given block.
+ #
+ # If the arity of the block is 2, elements o1 and o2 are in common
+ # if block.call(o1, o2) is true. Otherwise, elements o1 and o2 are
+ # in common if block.call(o1) == block.call(o2).
+ #
+ # e.g.:
+ #
+ # require 'set'
+ # numbers = Set[1, 3, 4, 6, 9, 10, 11]
+ # set = numbers.divide { |i,j| (i - j).abs == 1 }
+ # p set # => #<Set: {#<Set: {1}>,
+ # # #<Set: {11, 9, 10}>,
+ # # #<Set: {3, 4}>,
+ # # #<Set: {6}>}>
+ def divide(&func)
+ if func.arity == 2
+ require 'tsort'
+
+ class << dig = {} # :nodoc:
+ include TSort
+
+ alias tsort_each_node each_key
+ def tsort_each_child(node, &block)
+ fetch(node).each(&block)
+ end
+ end
+
+ each { |u|
+ dig[u] = a = []
+ each{ |v| func.call(u, v) and a << v }
+ }
+
+ set = Set.new()
+ dig.each_strongly_connected_component { |css|
+ set.add(self.class.new(css))
+ }
+ set
+ else
+ Set.new(classify(&func).values)
+ end
+ end
+
+ InspectKey = :__inspect_key__ # :nodoc:
+
+ # Returns a string containing a human-readable representation of the
+ # set. ("#<Set: {element1, element2, ...}>")
+ def inspect
+ ids = (Thread.current[InspectKey] ||= [])
+
+ if ids.include?(object_id)
+ return sprintf('#<%s: {...}>', self.class.name)
+ end
+
+ begin
+ ids << object_id
+ return sprintf('#<%s: {%s}>', self.class, to_a.inspect[1..-2])
+ ensure
+ ids.pop
+ end
+ end
+
+ def pretty_print(pp) # :nodoc:
+ pp.text sprintf('#<%s: {', self.class.name)
+ pp.nest(1) {
+ pp.seplist(self) { |o|
+ pp.pp o
+ }
+ }
+ pp.text "}>"
+ end
+
+ def pretty_print_cycle(pp) # :nodoc:
+ pp.text sprintf('#<%s: {%s}>', self.class.name, empty? ? '' : '...')
+ end
+end
+
+# SortedSet implements a set which elements are sorted in order. See Set.
+class SortedSet < Set
+ @@setup = false
+
+ class << self
+ def [](*ary) # :nodoc:
+ new(ary)
+ end
+
+ def setup # :nodoc:
+ @@setup and return
+
+ begin
+ require 'rbtree'
+
+ module_eval %{
+ def initialize(*args, &block)
+ @hash = RBTree.new
+ super
+ end
+ }
+ rescue LoadError
+ module_eval %{
+ def initialize(*args, &block)
+ @keys = nil
+ super
+ end
+
+ def clear
+ @keys = nil
+ super
+ end
+
+ def replace(enum)
+ @keys = nil
+ super
+ end
+
+ def add(o)
+ @keys = nil
+ @hash[o] = true
+ self
+ end
+ alias << add
+
+ def delete(o)
+ @keys = nil
+ @hash.delete(o)
+ self
+ end
+
+ def delete_if
+ n = @hash.size
+ @hash.delete_if { |o,| yield(o) }
+ @keys = nil if @hash.size != n
+ self
+ end
+
+ def merge(enum)
+ @keys = nil
+ super
+ end
+
+ def each
+ to_a.each { |o| yield(o) }
+ end
+
+ def to_a
+ (@keys = @hash.keys).sort! unless @keys
+ @keys
+ end
+ }
+ end
+
+ @@setup = true
+ end
+ end
+
+ def initialize(*args, &block) # :nodoc:
+ SortedSet.setup
+ initialize(*args, &block)
+ end
+end
+
+module Enumerable
+ # Makes a set from the enumerable object with given arguments.
+ def to_set(klass = Set, *args, &block)
+ klass.new(self, *args, &block)
+ end
+end
+
+# =begin
+# == RestricedSet class
+# RestricedSet implements a set with restrictions defined by a given
+# block.
+#
+# === Super class
+# Set
+#
+# === Class Methods
+# --- RestricedSet::new(enum = nil) { |o| ... }
+# --- RestricedSet::new(enum = nil) { |rset, o| ... }
+# Creates a new restricted set containing the elements of the given
+# enumerable object. Restrictions are defined by the given block.
+#
+# If the block's arity is 2, it is called with the RestrictedSet
+# itself and an object to see if the object is allowed to be put in
+# the set.
+#
+# Otherwise, the block is called with an object to see if the object
+# is allowed to be put in the set.
+#
+# === Instance Methods
+# --- restriction_proc
+# Returns the restriction procedure of the set.
+#
+# =end
+#
+# class RestricedSet < Set
+# def initialize(*args, &block)
+# @proc = block or raise ArgumentError, "missing a block"
+#
+# if @proc.arity == 2
+# instance_eval %{
+# def add(o)
+# @hash[o] = true if @proc.call(self, o)
+# self
+# end
+# alias << add
+#
+# def add?(o)
+# if include?(o) || !@proc.call(self, o)
+# nil
+# else
+# @hash[o] = true
+# self
+# end
+# end
+#
+# def replace(enum)
+# enum.is_a?(Enumerable) or raise ArgumentError, "value must be enumerable"
+# clear
+# enum.each { |o| add(o) }
+#
+# self
+# end
+#
+# def merge(enum)
+# enum.is_a?(Enumerable) or raise ArgumentError, "value must be enumerable"
+# enum.each { |o| add(o) }
+#
+# self
+# end
+# }
+# else
+# instance_eval %{
+# def add(o)
+# if @proc.call(o)
+# @hash[o] = true
+# end
+# self
+# end
+# alias << add
+#
+# def add?(o)
+# if include?(o) || !@proc.call(o)
+# nil
+# else
+# @hash[o] = true
+# self
+# end
+# end
+# }
+# end
+#
+# super(*args)
+# end
+#
+# def restriction_proc
+# @proc
+# end
+# end
+
+if $0 == __FILE__
+ eval DATA.read, nil, $0, __LINE__+4
+end
+
+# = rweb - CGI Support Library
+#
+# Author:: Johannes Barre (mailto:rweb@igels.net)
+# Copyright:: Copyright (c) 2003, 04 by Johannes Barre
+# License:: GNU Lesser General Public License (COPYING, http://www.gnu.org/copyleft/lesser.html)
+# Version:: 0.1.0
+# CVS-ID:: $Id: rweb.rb 6 2004-06-16 15:56:26Z igel $
+#
+# == What is Rweb?
+# Rweb is a replacement for the cgi class included in the ruby distribution.
+#
+# == How to use
+#
+# === Basics
+#
+# This class is made to be as easy as possible to use. An example:
+#
+# require "rweb"
+#
+# web = Rweb.new
+# web.out do
+# web.puts "Hello world!"
+# end
+#
+# The visitor will get a simple "Hello World!" in his browser. Please notice,
+# that won't set html-tags for you, so you should better do something like this:
+#
+# require "rweb"
+#
+# web = Rweb.new
+# web.out do
+# web.puts "<html><body>Hello world!</body></html>"
+# end
+#
+# === Set headers
+# Of course, it's also possible to tell the browser, that the content of this
+# page is plain text instead of html code:
+#
+# require "rweb"
+#
+# web = Rweb.new
+# web.out do
+# web.header("content-type: text/plain")
+# web.puts "Hello plain world!"
+# end
+#
+# Please remember, headers can't be set after the page content has been send.
+# You have to set all nessessary headers before the first puts oder print. It's
+# possible to cache the content until everything is complete. Doing it this
+# way, you can set headers everywhere.
+#
+# If you set a header twice, the second header will replace the first one. The
+# header name is not casesensitive, it will allways converted in to the
+# capitalised form suggested by the w3c (http://w3.org)
+#
+# === Set cookies
+# Setting cookies is quite easy:
+# include 'rweb'
+#
+# web = Rweb.new
+# Cookie.new("Visits", web.cookies['visits'].to_i +1)
+# web.out do
+# web.puts "Welcome back! You visited this page #{web.cookies['visits'].to_i +1} times"
+# end
+#
+# See the class Cookie for more details.
+#
+# === Get form and cookie values
+# There are four ways to submit data from the browser to the server and your
+# ruby script: via GET, POST, cookies and file upload. Rweb doesn't support
+# file upload by now.
+#
+# include 'rweb'
+#
+# web = Rweb.new
+# web.out do
+# web.print "action: #{web.get['action']} "
+# web.puts "The value of the cookie 'visits' is #{web.cookies['visits']}"
+# web.puts "The post parameter 'test['x']' is #{web.post['test']['x']}"
+# end
+
+RWEB_VERSION = "0.1.0"
+RWEB = "rweb/#{RWEB_VERSION}"
+
+#require 'rwebcookie' -> edit by bunny :-)
+
+class Rweb
+ # All parameter submitted via the GET method are available in attribute
+ # get. This is Hash, where every parameter is available as a key-value
+ # pair.
+ #
+ # If your input tag has a name like this one, it's value will be available
+ # as web.get["fieldname"]
+ # <input name="fieldname">
+ # You can submit values as a Hash
+ # <input name="text['index']">
+ # <input name="text['index2']">
+ # will be available as
+ # web.get["text"]["index"]
+ # web.get["text"]["index2"]
+ # Integers are also possible
+ # <input name="int[2]">
+ # <input name="int[3]['hi']>
+ # will be available as
+ # web.get["int"][2]
+ # web.get["int"][3]["hi"]
+ # If you specify no index, the lowest unused index will be used:
+ # <input name="int[]"><!-- First Field -->
+ # <input name="int[]"><!-- Second one -->
+ # will be available as
+ # web.get["int"][0] # First Field
+ # web.get["int"][1] # Second one
+ # Please notice, this doesn'd work like you might expect:
+ # <input name="text[index]">
+ # It will not be available as web.get["text"]["index"] but
+ # web.get["text[index]"]
+ attr_reader :get
+
+ # All parameters submitted via POST are available in the attribute post. It
+ # works like the get attribute.
+ # <input name="text[0]">
+ # will be available as
+ # web.post["text"][0]
+ attr_reader :post
+
+ # All cookies submitted by the browser are available in cookies. This is a
+ # Hash, where every cookie is a key-value pair.
+ attr_reader :cookies
+
+ # The name of the browser identification is submitted as USER_AGENT and
+ # available in this attribute.
+ attr_reader :user_agent
+
+ # The IP address of the client.
+ attr_reader :remote_addr
+
+ # Creates a new Rweb object. This should only done once. You can set various
+ # options via the settings hash.
+ #
+ # "cache" => true: Everything you script send to the client will be cached
+ # until the end of the out block or until flush is called. This way, you
+ # can modify headers and cookies even after printing something to the client.
+ #
+ # "safe" => level: Changes the $SAFE attribute. By default, $SAFE will be set
+ # to 1. If $SAFE is already higher than this value, it won't be changed.
+ #
+ # "silend" => true: Normaly, Rweb adds automaticly a header like this
+ # "X-Powered-By: Rweb/x.x.x (Ruby/y.y.y)". With the silend option you can
+ # suppress this.
+ def initialize (settings = {})
+ # {{{
+ @header = {}
+ @cookies = {}
+ @get = {}
+ @post = {}
+
+ # Internal attributes
+ @status = nil
+ @reasonPhrase = nil
+ @setcookies = []
+ @output_started = false;
+ @output_allowed = false;
+
+ @mod_ruby = false
+ @env = ENV.to_hash
+
+ if defined?(MOD_RUBY)
+ @output_method = "mod_ruby"
+ @mod_ruby = true
+ elsif @env['SERVER_SOFTWARE'] =~ /^Microsoft-IIS/i
+ @output_method = "nph"
+ else
+ @output_method = "ph"
+ end
+
+ unless settings.is_a?(Hash)
+ raise TypeError, "settings must be a Hash"
+ end
+ @settings = settings
+
+ unless @settings.has_key?("safe")
+ @settings["safe"] = 1
+ end
+
+ if $SAFE < @settings["safe"]
+ $SAFE = @settings["safe"]
+ end
+
+ unless @settings.has_key?("cache")
+ @settings["cache"] = false
+ end
+
+ # mod_ruby sets no QUERY_STRING variable, if no GET-Parameters are given
+ unless @env.has_key?("QUERY_STRING")
+ @env["QUERY_STRING"] = ""
+ end
+
+ # Now we split the QUERY_STRING by the seperators & and ; or, if
+ # specified, settings['get seperator']
+ unless @settings.has_key?("get seperator")
+ get_args = @env['QUERY_STRING'].split(/[&;]/)
+ else
+ get_args = @env['QUERY_STRING'].split(@settings['get seperator'])
+ end
+
+ get_args.each do | arg |
+ arg_key, arg_val = arg.split(/=/, 2)
+ arg_key = Rweb::unescape(arg_key)
+ arg_val = Rweb::unescape(arg_val)
+
+ # Parse names like name[0], name['text'] or name[]
+ pattern = /^(.+)\[("[^\]]*"|'[^\]]*'|[0-9]*)\]$/
+ keys = []
+ while match = pattern.match(arg_key)
+ arg_key = match[1]
+ keys = [match[2]] + keys
+ end
+ keys = [arg_key] + keys
+
+ akt = @get
+ last = nil
+ lastkey = nil
+ keys.each do |key|
+ if key == ""
+ # No key specified (like in "test[]"), so we use the
+ # lowerst unused Integer as key
+ key = 0
+ while akt.has_key?(key)
+ key += 1
+ end
+ elsif /^[0-9]*$/ =~ key
+ # If the index is numerical convert it to an Integer
+ key = key.to_i
+ elsif key[0].chr == "'" || key[0].chr == '"'
+ key = key[1, key.length() -2]
+ end
+ if !akt.has_key?(key) || !akt[key].class == Hash
+ # create an empty Hash if there isn't already one
+ akt[key] = {}
+ end
+ last = akt
+ lastkey = key
+ akt = akt[key]
+ end
+ last[lastkey] = arg_val
+ end
+
+ if @env['REQUEST_METHOD'] == "POST"
+ if @env.has_key?("CONTENT_TYPE") && @env['CONTENT_TYPE'] == "application/x-www-form-urlencoded" && @env.has_key?('CONTENT_LENGTH')
+ unless @settings.has_key?("post seperator")
+ post_args = $stdin.read(@env['CONTENT_LENGTH'].to_i).split(/[&;]/)
+ else
+ post_args = $stdin.read(@env['CONTENT_LENGTH'].to_i).split(@settings['post seperator'])
+ end
+ post_args.each do | arg |
+ arg_key, arg_val = arg.split(/=/, 2)
+ arg_key = Rweb::unescape(arg_key)
+ arg_val = Rweb::unescape(arg_val)
+
+ # Parse names like name[0], name['text'] or name[]
+ pattern = /^(.+)\[("[^\]]*"|'[^\]]*'|[0-9]*)\]$/
+ keys = []
+ while match = pattern.match(arg_key)
+ arg_key = match[1]
+ keys = [match[2]] + keys
+ end
+ keys = [arg_key] + keys
+
+ akt = @post
+ last = nil
+ lastkey = nil
+ keys.each do |key|
+ if key == ""
+ # No key specified (like in "test[]"), so we use
+ # the lowerst unused Integer as key
+ key = 0
+ while akt.has_key?(key)
+ key += 1
+ end
+ elsif /^[0-9]*$/ =~ key
+ # If the index is numerical convert it to an Integer
+ key = key.to_i
+ elsif key[0].chr == "'" || key[0].chr == '"'
+ key = key[1, key.length() -2]
+ end
+ if !akt.has_key?(key) || !akt[key].class == Hash
+ # create an empty Hash if there isn't already one
+ akt[key] = {}
+ end
+ last = akt
+ lastkey = key
+ akt = akt[key]
+ end
+ last[lastkey] = arg_val
+ end
+ else
+ # Maybe we should print a warning here?
+ $stderr.print("Unidentified form data recived and discarded.")
+ end
+ end
+
+ if @env.has_key?("HTTP_COOKIE")
+ cookie = @env['HTTP_COOKIE'].split(/; ?/)
+ cookie.each do | c |
+ cookie_key, cookie_val = c.split(/=/, 2)
+
+ @cookies [Rweb::unescape(cookie_key)] = Rweb::unescape(cookie_val)
+ end
+ end
+
+ if defined?(@env['HTTP_USER_AGENT'])
+ @user_agent = @env['HTTP_USER_AGENT']
+ else
+ @user_agent = nil;
+ end
+
+ if defined?(@env['REMOTE_ADDR'])
+ @remote_addr = @env['REMOTE_ADDR']
+ else
+ @remote_addr = nil
+ end
+ # }}}
+ end
+
+ # Prints a String to the client. If caching is enabled, the String will
+ # buffered until the end of the out block ends.
+ def print(str = "")
+ # {{{
+ unless @output_allowed
+ raise "You just can write to output inside of a Rweb::out-block"
+ end
+
+ if @settings["cache"]
+ @buffer += [str.to_s]
+ else
+ unless @output_started
+ sendHeaders
+ end
+ $stdout.print(str)
+ end
+ nil
+ # }}}
+ end
+
+ # Prints a String to the client and adds a line break at the end. Please
+ # remember, that a line break is not visible in HTML, use the <br> HTML-Tag
+ # for this. If caching is enabled, the String will buffered until the end
+ # of the out block ends.
+ def puts(str = "")
+ # {{{
+ self.print(str + "\n")
+ # }}}
+ end
+
+ # Alias to print.
+ def write(str = "")
+ # {{{
+ self.print(str)
+ # }}}
+ end
+
+ # If caching is enabled, all cached data are send to the cliend and the
+ # cache emptied.
+ def flush
+ # {{{
+ unless @output_allowed
+ raise "You can't use flush outside of a Rweb::out-block"
+ end
+ buffer = @buffer.join
+
+ unless @output_started
+ sendHeaders
+ end
+ $stdout.print(buffer)
+
+ @buffer = []
+ # }}}
+ end
+
+ # Sends one or more header to the client. All headers are cached just
+ # before body data are send to the client. If the same header are set
+ # twice, only the last value is send.
+ #
+ # Example:
+ # web.header("Last-Modified: Mon, 16 Feb 2004 20:15:41 GMT")
+ # web.header("Location: http://www.ruby-lang.org")
+ #
+ # You can specify more than one header at the time by doing something like
+ # this:
+ # web.header("Content-Type: text/plain\nContent-Length: 383")
+ # or
+ # web.header(["Content-Type: text/plain", "Content-Length: 383"])
+ def header(str)
+ # {{{
+ if @output_started
+ raise "HTTP-Headers are already send. You can't change them after output has started!"
+ end
+ unless @output_allowed
+ raise "You just can set headers inside of a Rweb::out-block"
+ end
+ if str.is_a?Array
+ str.each do | value |
+ self.header(value)
+ end
+
+ elsif str.split(/\n/).length > 1
+ str.split(/\n/).each do | value |
+ self.header(value)
+ end
+
+ elsif str.is_a? String
+ str.gsub!(/\r/, "")
+
+ if (str =~ /^HTTP\/1\.[01] [0-9]{3} ?.*$/) == 0
+ pattern = /^HTTP\/1.[01] ([0-9]{3}) ?(.*)$/
+
+ result = pattern.match(str)
+ self.setstatus(result[0], result[1])
+ elsif (str =~ /^status: [0-9]{3} ?.*$/i) == 0
+ pattern = /^status: ([0-9]{3}) ?(.*)$/i
+
+ result = pattern.match(str)
+ self.setstatus(result[0], result[1])
+ else
+ a = str.split(/: ?/, 2)
+
+ @header[a[0].downcase] = a[1]
+ end
+ end
+ # }}}
+ end
+
+ # Changes the status of this page. There are several codes like "200 OK",
+ # "302 Found", "404 Not Found" or "500 Internal Server Error". A list of
+ # all codes is available at
+ # http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10
+ #
+ # You can just send the code number, the reason phrase will be added
+ # automaticly with the recommendations from the w3c if not specified. If
+ # you set the status twice or more, only the last status will be send.
+ # Examples:
+ # web.status("401 Unauthorized")
+ # web.status("410 Sad but true, this lonely page is gone :(")
+ # web.status(206)
+ # web.status("400")
+ #
+ # The default status is "200 OK". If a "Location" header is set, the
+ # default status is "302 Found".
+ def status(str)
+ # {{{
+ if @output_started
+ raise "HTTP-Headers are already send. You can't change them after output has started!"
+ end
+ unless @output_allowed
+ raise "You just can set headers inside of a Rweb::out-block"
+ end
+ if str.is_a?Integer
+ @status = str
+ elsif str.is_a?String
+ p1 = /^([0-9]{3}) ?(.*)$/
+ p2 = /^HTTP\/1\.[01] ([0-9]{3}) ?(.*)$/
+ p3 = /^status: ([0-9]{3}) ?(.*)$/i
+
+ if (a = p1.match(str)) == nil
+ if (a = p2.match(str)) == nil
+ if (a = p3.match(str)) == nil
+ raise ArgumentError, "Invalid argument", caller
+ end
+ end
+ end
+ @status = a[1].to_i
+ if a[2] != ""
+ @reasonPhrase = a[2]
+ else
+ @reasonPhrase = getReasonPhrase(@status)
+ end
+ else
+ raise ArgumentError, "Argument of setstatus must be integer or string", caller
+ end
+ # }}}
+ end
+
+ # Handles the output of your content and rescues all exceptions. Send all
+ # data in the block to this method. For example:
+ # web.out do
+ # web.header("Content-Type: text/plain")
+ # web.puts("Hello, plain world!")
+ # end
+ def out
+ # {{{
+ @output_allowed = true
+ @buffer = []; # We use an array as buffer, because it's more performant :)
+
+ begin
+ yield
+ rescue Exception => exception
+ $stderr.puts "Ruby exception rescued (#{exception.class}): #{exception.message}"
+ $stderr.puts exception.backtrace.join("\n")
+
+ unless @output_started
+ self.setstatus(500)
+ @header = {}
+ end
+
+ unless (@settings.has_key?("hide errors") and @settings["hide errors"] == true)
+ unless @output_started
+ self.header("Content-Type: text/html")
+ self.puts "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Strict//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">"
+ self.puts "<html>"
+ self.puts "<head>"
+ self.puts "<title>500 Internal Server Error</title>"
+ self.puts "</head>"
+ self.puts "<body>"
+ end
+ if @header.has_key?("content-type") and (@header["content-type"] =~ /^text\/html/i) == 0
+ self.puts "<h1>Internal Server Error</h1>"
+ self.puts "<p>The server encountered an exception and was unable to complete your request.</p>"
+ self.puts "<p>The exception has provided the following information:</p>"
+ self.puts "<pre style=\"background: #FFCCCC; border: black solid 2px; margin-left: 2cm; margin-right: 2cm; padding: 2mm;\"><b>#{exception.class}</b>: #{exception.message} <b>on</b>"
+ self.puts
+ self.puts "#{exception.backtrace.join("\n")}</pre>"
+ self.puts "</body>"
+ self.puts "</html>"
+ else
+ self.puts "The server encountered an exception and was unable to complete your request"
+ self.puts "The exception has provided the following information:"
+ self.puts "#{exception.class}: #{exception.message}"
+ self.puts
+ self.puts exception.backtrace.join("\n")
+ end
+ end
+ end
+
+ if @settings["cache"]
+ buffer = @buffer.join
+
+ unless @output_started
+ unless @header.has_key?("content-length")
+ self.header("content-length: #{buffer.length}")
+ end
+
+ sendHeaders
+ end
+ $stdout.print(buffer)
+ elsif !@output_started
+ sendHeaders
+ end
+ @output_allowed = false;
+ # }}}
+ end
+
+ # Decodes URL encoded data, %20 for example stands for a space.
+ def Rweb.unescape(str)
+ # {{{
+ if defined? str and str.is_a? String
+ str.gsub!(/\+/, " ")
+ str.gsub(/%.{2}/) do | s |
+ s[1,2].hex.chr
+ end
+ end
+ # }}}
+ end
+
+ protected
+ def sendHeaders
+ # {{{
+
+ Cookie.disallow # no more cookies can be set or modified
+ if !(@settings.has_key?("silent") and @settings["silent"] == true) and !@header.has_key?("x-powered-by")
+ if @mod_ruby
+ header("x-powered-by: #{RWEB} (Ruby/#{RUBY_VERSION}, #{MOD_RUBY})");
+ else
+ header("x-powered-by: #{RWEB} (Ruby/#{RUBY_VERSION})");
+ end
+ end
+
+ if @output_method == "ph"
+ if ((@status == nil or @status == 200) and !@header.has_key?("content-type") and !@header.has_key?("location"))
+ header("content-type: text/html")
+ end
+
+ if @status != nil
+ $stdout.print "Status: #{@status} #{@reasonPhrase}\r\n"
+ end
+
+ @header.each do |key, value|
+ key = key *1 # "unfreeze" key :)
+ key[0] = key[0,1].upcase![0]
+
+ key = key.gsub(/-[a-z]/) do |char|
+ "-" + char[1,1].upcase
+ end
+
+ $stdout.print "#{key}: #{value}\r\n"
+ end
+ cookies = Cookie.getHttpHeader # Get all cookies as an HTTP Header
+ if cookies
+ $stdout.print cookies
+ end
+
+ $stdout.print "\r\n"
+
+ elsif @output_method == "nph"
+ elsif @output_method == "mod_ruby"
+ r = Apache.request
+
+ if ((@status == nil or @status == 200) and !@header.has_key?("content-type") and !@header.has_key?("location"))
+ header("text/html")
+ end
+
+ if @status != nil
+ r.status_line = "#{@status} #{@reasonPhrase}"
+ end
+
+ r.send_http_header
+ @header.each do |key, value|
+ key = key *1 # "unfreeze" key :)
+
+ key[0] = key[0,1].upcase![0]
+ key = key.gsub(/-[a-z]/) do |char|
+ "-" + char[1,1].upcase
+ end
+ puts "#{key}: #{value.class}"
+ #r.headers_out[key] = value
+ end
+ end
+ @output_started = true
+ # }}}
+ end
+
+ def getReasonPhrase (status)
+ # {{{
+ if status == 100
+ "Continue"
+ elsif status == 101
+ "Switching Protocols"
+ elsif status == 200
+ "OK"
+ elsif status == 201
+ "Created"
+ elsif status == 202
+ "Accepted"
+ elsif status == 203
+ "Non-Authoritative Information"
+ elsif status == 204
+ "No Content"
+ elsif status == 205
+ "Reset Content"
+ elsif status == 206
+ "Partial Content"
+ elsif status == 300
+ "Multiple Choices"
+ elsif status == 301
+ "Moved Permanently"
+ elsif status == 302
+ "Found"
+ elsif status == 303
+ "See Other"
+ elsif status == 304
+ "Not Modified"
+ elsif status == 305
+ "Use Proxy"
+ elsif status == 307
+ "Temporary Redirect"
+ elsif status == 400
+ "Bad Request"
+ elsif status == 401
+ "Unauthorized"
+ elsif status == 402
+ "Payment Required"
+ elsif status == 403
+ "Forbidden"
+ elsif status == 404
+ "Not Found"
+ elsif status == 405
+ "Method Not Allowed"
+ elsif status == 406
+ "Not Acceptable"
+ elsif status == 407
+ "Proxy Authentication Required"
+ elsif status == 408
+ "Request Time-out"
+ elsif status == 409
+ "Conflict"
+ elsif status == 410
+ "Gone"
+ elsif status == 411
+ "Length Required"
+ elsif status == 412
+ "Precondition Failed"
+ elsif status == 413
+ "Request Entity Too Large"
+ elsif status == 414
+ "Request-URI Too Large"
+ elsif status == 415
+ "Unsupported Media Type"
+ elsif status == 416
+ "Requested range not satisfiable"
+ elsif status == 417
+ "Expectation Failed"
+ elsif status == 500
+ "Internal Server Error"
+ elsif status == 501
+ "Not Implemented"
+ elsif status == 502
+ "Bad Gateway"
+ elsif status == 503
+ "Service Unavailable"
+ elsif status == 504
+ "Gateway Time-out"
+ elsif status == 505
+ "HTTP Version not supported"
+ else
+ raise "Unknown Statuscode. See http://www.w3.org/Protocols/rfc2616/rfc2616-sec6.html#sec6.1 for more information."
+ end
+ # }}}
+ end
+end
+
+class Cookie
+ attr_reader :name, :value, :maxage, :path, :domain, :secure, :comment
+
+ # Sets a cookie. Please see below for details of the attributes.
+ def initialize (name, value = nil, maxage = nil, path = nil, domain = nil, secure = false)
+ # {{{
+ # HTTP headers (Cookies are a HTTP header) can only set, while no content
+ # is send. So an exception will be raised, when @@allowed is set to false
+ # and a new cookie has set.
+ unless defined?(@@allowed)
+ @@allowed = true
+ end
+ unless @@allowed
+ raise "You can't set cookies after the HTTP headers are send."
+ end
+
+ unless defined?(@@list)
+ @@list = []
+ end
+ @@list += [self]
+
+ unless defined?(@@type)
+ @@type = "netscape"
+ end
+
+ unless name.class == String
+ raise TypeError, "The name of a cookie must be a string", caller
+ end
+ if value.class.superclass == Integer || value.class == Float
+ value = value.to_s
+ elsif value.class != String && value != nil
+ raise TypeError, "The value of a cookie must be a string, integer, float or nil", caller
+ end
+ if maxage.class == Time
+ maxage = maxage - Time.now
+ elsif !maxage.class.superclass == Integer || !maxage == nil
+ raise TypeError, "The maxage date of a cookie must be an Integer or Time object or nil.", caller
+ end
+ unless path.class == String || path == nil
+ raise TypeError, "The path of a cookie must be nil or a string", caller
+ end
+ unless domain.class == String || domain == nil
+ raise TypeError, "The value of a cookie must be nil or a string", caller
+ end
+ unless secure == true || secure == false
+ raise TypeError, "The secure field of a cookie must be true or false", caller
+ end
+
+ @name, @value, @maxage, @path, @domain, @secure = name, value, maxage, path, domain, secure
+ @comment = nil
+ # }}}
+ end
+
+ # Modifies the value of this cookie. The information you want to store. If the
+ # value is nil, the cookie will be deleted by the client.
+ #
+ # This attribute can be a String, Integer or Float object or nil.
+ def value=(value)
+ # {{{
+ if value.class.superclass == Integer || value.class == Float
+ value = value.to_s
+ elsif value.class != String && value != nil
+ raise TypeError, "The value of a cookie must be a string, integer, float or nil", caller
+ end
+ @value = value
+ # }}}
+ end
+
+ # Modifies the maxage of this cookie. This attribute defines the lifetime of
+ # the cookie, in seconds. A value of 0 means the cookie should be discarded
+ # imediatly. If it set to nil, the cookie will be deleted when the browser
+ # will be closed.
+ #
+ # Attention: This is different from other implementations like PHP, where you
+ # gives the seconds since 1/1/1970 0:00:00 GMT.
+ #
+ # This attribute must be an Integer or Time object or nil.
+ def maxage=(maxage)
+ # {{{
+ if maxage.class == Time
+ maxage = maxage - Time.now
+ elsif maxage.class.superclass == Integer || !maxage == nil
+ raise TypeError, "The maxage of a cookie must be an Interger or Time object or nil.", caller
+ end
+ @maxage = maxage
+ # }}}
+ end
+
+ # Modifies the path value of this cookie. The client will send this cookie
+ # only, if the requested document is this directory or a subdirectory of it.
+ #
+ # The value of the attribute must be a String object or nil.
+ def path=(path)
+ # {{{
+ unless path.class == String || path == nil
+ raise TypeError, "The path of a cookie must be nil or a string", caller
+ end
+ @path = path
+ # }}}
+ end
+
+ # Modifies the domain value of this cookie. The client will send this cookie
+ # only if it's connected with this domain (or a subdomain, if the first
+ # character is a dot like in ".ruby-lang.org")
+ #
+ # The value of this attribute must be a String or nil.
+ def domain=(domain)
+ # {{{
+ unless domain.class == String || domain == nil
+ raise TypeError, "The domain of a cookie must be a String or nil.", caller
+ end
+ @domain = domain
+ # }}}
+ end
+
+ # Modifies the secure flag of this cookie. If it's true, the client will only
+ # send this cookie if it is secured connected with us.
+ #
+ # The value od this attribute has to be true or false.
+ def secure=(secure)
+ # {{{
+ unless secure == true || secure == false
+ raise TypeError, "The secure field of a cookie must be true or false", caller
+ end
+ @secure = secure
+ # }}}
+ end
+
+ # Modifies the comment value of this cookie. The comment won't be send, if
+ # type is "netscape".
+ def comment=(comment)
+ # {{{
+ unless comment.class == String || comment == nil
+ raise TypeError, "The comment of a cookie must be a string or nil", caller
+ end
+ @comment = comment
+ # }}}
+ end
+
+ # Changes the type of all cookies.
+ # Allowed values are RFC2109 and netscape (default).
+ def Cookie.type=(type)
+ # {{{
+ unless @@allowed
+ raise "The cookies are allready send, so you can't change the type anymore."
+ end
+ unless type.downcase == "rfc2109" && type.downcase == "netscape"
+ raise "The type of the cookies must be \"RFC2109\" or \"netscape\"."
+ end
+ @@type = type;
+ # }}}
+ end
+
+ # After sending this message, no cookies can be set or modified. Use it, when
+ # HTTP-Headers are send. Rweb does this for you.
+ def Cookie.disallow
+ # {{{
+ @@allowed = false
+ true
+ # }}}
+ end
+
+ # Returns a HTTP header (type String) with all cookies. Rweb does this for
+ # you.
+ def Cookie.getHttpHeader
+ # {{{
+ if defined?(@@list)
+ if @@type == "netscape"
+ str = ""
+ @@list.each do |cookie|
+ if cookie.value == nil
+ cookie.maxage = 0
+ cookie.value = ""
+ end
+ # TODO: Name and value should be escaped!
+ str += "Set-Cookie: #{cookie.name}=#{cookie.value}"
+ unless cookie.maxage == nil
+ expire = Time.now + cookie.maxage
+ expire.gmtime
+ str += "; Expire=#{expire.strftime("%a, %d-%b-%Y %H:%M:%S %Z")}"
+ end
+ unless cookie.domain == nil
+ str += "; Domain=#{cookie.domain}"
+ end
+ unless cookie.path == nil
+ str += "; Path=#{cookie.path}"
+ end
+ if cookie.secure
+ str += "; Secure"
+ end
+ str += "\r\n"
+ end
+ return str
+ else # type == "RFC2109"
+ str = "Set-Cookie: "
+ comma = false;
+
+ @@list.each do |cookie|
+ if cookie.value == nil
+ cookie.maxage = 0
+ cookie.value = ""
+ end
+ if comma
+ str += ","
+ end
+ comma = true
+
+ str += "#{cookie.name}=\"#{cookie.value}\""
+ unless cookie.maxage == nil
+ str += "; Max-Age=\"#{cookie.maxage}\""
+ end
+ unless cookie.domain == nil
+ str += "; Domain=\"#{cookie.domain}\""
+ end
+ unless cookie.path == nil
+ str += "; Path=\"#{cookie.path}\""
+ end
+ if cookie.secure
+ str += "; Secure"
+ end
+ unless cookie.comment == nil
+ str += "; Comment=\"#{cookie.comment}\""
+ end
+ str += "; Version=\"1\""
+ end
+ str
+ end
+ else
+ false
+ end
+ # }}}
+ end
+end
+
+require 'strscan'
+
+module BBCode
+ DEBUG = true
+
+ use 'encoder', 'tags', 'tagstack', 'smileys'
+
+=begin
+ The Parser class takes care of the encoding.
+ It scans the given BBCode (as plain text), finds tags
+ and smilies and also makes links of urls in text.
+
+ Normal text is send directly to the encoder.
+
+ If a tag was found, an instance of a Tag subclass is created
+ to handle the case.
+
+ The @tagstack manages tag nesting and ensures valid HTML.
+=end
+
+ class Parser
+ class Attribute
+ # flatten and use only one empty_arg
+ def self.create attr
+ attr = flatten attr
+ return @@empty_attr if attr.empty?
+ new attr
+ end
+
+ private_class_method :new
+
+ # remove leading and trailing whitespace; concat lines
+ def self.flatten attr
+ attr.strip.gsub(/\n/, ' ')
+ # -> ^ and $ can only match at begin and end now
+ end
+
+ ATTRIBUTE_SCAN = /
+ (?!$) # don't match at end
+ \s*
+ ( # $1 = key
+ [^=\s\]"\\]*
+ (?:
+ (?: \\. | "[^"\\]*(?:\\.[^"\\]*)*"? )
+ [^=\s\]"\\]*
+ )*
+ )
+ (?:
+ =
+ ( # $2 = value
+ [^\s\]"\\]*
+ (?:
+ (?: \\. | "[^"\\]*(?:\\.[^"\\]*)*"? )
+ [^\s\]"\\]*
+ )*
+ )?
+ )?
+ \s*
+ /x
+
+ def self.parse source
+ source = source.dup
+ # empty_tag: the tag looks like [... /]
+ # slice!: this deletes the \s*/] at the end
+ # \s+ because [url=http://rubybb.org/forum/] is NOT an empty tag.
+ # In RubyBBCode, you can use [url=http://rubybb.org/forum/ /], and this has to be
+ # interpreted correctly.
+ empty_tag = source.sub!(/^:/, '=') or source.slice!(/\/$/)
+ debug 'PARSE: ' + source.inspect + ' => ' + empty_tag.inspect
+ #-> we have now an attr that's EITHER empty OR begins and ends with non-whitespace.
+
+ attr = Hash.new
+ attr[:flags] = []
+ source.scan(ATTRIBUTE_SCAN) { |key, value|
+ if not value
+ attr[:flags] << unescape(key)
+ else
+ next if value.empty? and key.empty?
+ attr[unescape(key)] = unescape(value)
+ end
+ }
+ debug attr.inspect
+
+ return empty_tag, attr
+ end
+
+ def self.unescape_char esc
+ esc[1]
+ end
+
+ def self.unquote qt
+ qt[1..-1].chomp('"').gsub(/\\./) { |esc| unescape_char esc }
+ end
+
+ def self.unescape str
+ str.gsub(/ (\\.) | (" [^"\\]* (?:\\.[^"\\]*)* "?) /x) {
+ if $1
+ unescape_char $1
+ else
+ unquote $2
+ end
+ }
+ end
+
+ include Enumerable
+ def each &block
+ @args.each(&block)
+ end
+
+ attr_reader :source, :args, :value
+
+ def initialize source
+ @source = source
+ debug 'Attribute#new(%p)' % source
+ @empty_tag, @attr = Attribute.parse source
+ @value = @attr[''].to_s
+ end
+
+ def empty?
+ self == @@empty_attr
+ end
+
+ def empty_tag?
+ @empty_tag
+ end
+
+ def [] *keys
+ res = @attr[*keys]
+ end
+
+ def flags
+ attr[:flags]
+ end
+
+ def to_s
+ @attr
+ end
+
+ def inspect
+ 'ATTR[' + @attr.inspect + (@empty_tag ? ' | empty tag' : '') + ']'
+ end
+ end
+ class Attribute
+ @@empty_attr = new ''
+ end
+ end
+
+ class Parser
+ def Parser.flatten str
+ # replace mac & dos newlines with unix style
+ str.gsub(/\r\n?/, "\n")
+ end
+
+ def initialize input = ''
+ # input manager
+ @scanner = StringScanner.new ''
+ # output manager
+ @encoder = Encoder.new
+ @output = ''
+ # tag manager
+ @tagstack = TagStack.new(@encoder)
+
+ @do_magic = true
+ # set the input
+ feed input
+ end
+
+ # if you want, you can feed a parser instance after creating,
+ # or even feed it repeatedly.
+ def feed food
+ @scanner.string = Parser.flatten food
+ end
+
+ # parse through the string using parse_token
+ def parse
+ parse_token until @scanner.eos?
+ @tagstack.close_all
+ @output = parse_magic @encoder.output
+ end
+
+ def output
+ @output
+ end
+
+ # ok, internals start here
+ private
+ # the default output functions. everything should use them or the tags.
+ def add_text text = @scanner.matched
+ @encoder.add_text text
+ end
+
+ # use this carefully
+ def add_html html
+ @encoder.add_html html
+ end
+
+ # highlights the text as error
+ def add_garbage garbage
+ add_html '<span class="error">' if DEBUG
+ add_text garbage
+ add_html '</span>' if DEBUG
+ end
+
+ # unknown and incorrectly nested tags are ignored and
+ # sent as plaintext (garbage in - garbage out).
+ # in debug mode, garbage is marked with lime background.
+ def garbage_out start
+ @scanner.pos = start
+ garbage = @scanner.scan(/./m)
+ debug 'GARBAGE: ' + garbage
+ add_garbage garbage
+ end
+
+ # simple text; everything but [, \[ allowed
+ SIMPLE_TEXT_SCAN_ = /
+ [^\[\\]* # normal*
+ (?: # (
+ \\.? # special
+ [^\[\\]* # normal*
+ )* # )*
+ /mx
+ SIMPLE_TEXT_SCAN = /[^\[]+/
+
+=begin
+
+ WHAT IS A TAG?
+ ==============
+
+ Tags in BBCode can be much more than just a simple [b].
+ I use many terms here to differ the parts of each tag.
+
+ Basic scheme:
+ [ code ]
+ TAG START TAG INFO TAG END
+
+ Most tags need a second tag to close the range it opened.
+ This is done with CLOSING TAGS:
+ [/code]
+ or by using empty tags that have no content and close themselfes:
+ [url=winamp.com /]
+ You surely know this from HTML.
+ These slashes define the TAG KIND = normal|closing|empty and
+ cannot be used together.
+
+ Everything between [ and ] and expluding the slashes is called the
+ TAG INFO. This info may contain:
+ - TAG ID
+ - TAG NAME including the tag id
+ - attributes
+
+ The TAG ID is the first char of the info:
+
+ TAG | ID
+ ----------+----
+ [quote] | q
+ [±] | &
+ ["[b]"] | "
+ [/url] | u
+ [---] | -
+
+ As you can see, the tag id shows the TAG TYPE, it can be a
+ normal tag, a formatting tag or an entity.
+ Therefor, the parser first scans the id to decide how to go
+ on with parsing.
+=end
+ # tag
+ # TODO more complex expression allowing
+ # [quote="[ladico]"] and [quote=\[ladico\]] to be correct tags
+ TAG_BEGIN_SCAN = /
+ \[ # tag start
+ ( \/ )? # $1 = closing tag?
+ ( [^\]] ) # $2 = tag id
+ /x
+ TAG_END_SCAN = /
+ [^\]]* # rest that was not handled
+ \]? # tag end
+ /x
+ CLOSE_TAG_SCAN = /
+ ( [^\]]* ) # $1 = the rest of the tag info
+ ( \/ )? # $2 = empty tag?
+ \]? # tag end
+ /x
+ UNCLOSED_TAG_SCAN = / \[ /x
+
+ CLASSIC_TAG_SCAN = / [a-z]* /ix
+
+ SEPARATOR_TAG_SCAN = / \** /x
+
+ FORMAT_TAG_SCAN = / -- -* /x
+
+ QUOTED_SCAN = /
+ ( # $1 = quoted text
+ [^"\\]* # normal*
+ (?: # (
+ \\. # special
+ [^"\\]* # normal*
+ )* # )*
+ )
+ "? # end quote "
+ /mx
+
+ ENTITY_SCAN = /
+ ( [^;\]]+ ) # $1 = entity code
+ ;? # optional ending semicolon
+ /ix
+
+ SMILEY_SCAN = Smileys::SMILEY_PATTERN
+
+ # this is the main parser loop that separates
+ # text - everything until "["
+ # from
+ # tags - starting with "[", ending with "]"
+ def parse_token
+ if @scanner.scan(SIMPLE_TEXT_SCAN)
+ add_text
+ else
+ handle_tag
+ end
+ end
+
+ def handle_tag
+ tag_start = @scanner.pos
+
+ unless @scanner.scan TAG_BEGIN_SCAN
+ garbage_out tag_start
+ return
+ end
+
+ closing, id = @scanner[1], @scanner[2]
+ #debug 'handle_tag(%p)' % @scanner.matched
+
+ handled =
+ case id
+
+ when /[a-z]/i
+ if @scanner.scan(CLASSIC_TAG_SCAN)
+ if handle_classic_tag(id + @scanner.matched, closing)
+ already_closed = true
+ end
+ end
+
+ when '*'
+ if @scanner.scan(SEPARATOR_TAG_SCAN)
+ handle_asterisk tag_start, id + @scanner.matched
+ true
+ end
+
+ when '-'
+ if @scanner.scan(FORMAT_TAG_SCAN)
+ #format = id + @scanner.matched
+ @encoder.add_html "\n<hr>\n"
+ true
+ end
+
+ when '"'
+ if @scanner.scan(QUOTED_SCAN)
+ @encoder.add_text unescape(@scanner[1])
+ true
+ end
+
+ when '&'
+ if @scanner.scan(ENTITY_SCAN)
+ @encoder.add_entity @scanner[1]
+ true
+ end
+
+ when Smileys::SMILEY_START_CHARSET
+ @scanner.pos = @scanner.pos - 1 # (ungetch)
+ if @scanner.scan(SMILEY_SCAN)
+ @encoder.add_html Smileys.smiley_to_image(@scanner.matched)
+ true
+ end
+
+ end # case
+
+ return garbage_out(tag_start) unless handled
+
+ @scanner.scan(TAG_END_SCAN) unless already_closed
+ end
+
+ ATTRIBUTES_SCAN = /
+ (
+ [^\]"\\]*
+ (?:
+ (?:
+ \\.
+ |
+ "
+ [^"\\]*
+ (?:
+ \\.
+ [^"\\]*
+ )*
+ "?
+ )
+ [^\]"\\]*
+ )*
+ )
+ \]?
+ /x
+
+ def handle_classic_tag name, closing
+ debug 'TAG: ' + (closing ? '/' : '') + name
+ # flatten
+ name.downcase!
+ tag_class = TAG_LIST[name]
+ return unless tag_class
+
+ #debug((opening ? 'OPEN ' : 'CLOSE ') + tag_class.name)
+
+ # create an attribute object to handle it
+ @scanner.scan(ATTRIBUTES_SCAN)
+ #debug name + ':' + @scanner[1]
+ attr = Attribute.create @scanner[1]
+ #debug 'ATTRIBUTES %p ' % attr #unless attr.empty?
+
+ #debug 'closing: %p; name=%s, attr=%p' % [closing, name, attr]
+
+ # OPEN
+ if not closing and tag = @tagstack.try_open_class(tag_class, attr)
+ #debug 'opening'
+ tag.do_open @scanner
+ # this should be done by the tag itself.
+ if attr.empty_tag?
+ tag.handle_empty
+ @tagstack.close_tag
+ elsif tag.special_content?
+ handle_special_content(tag)
+ @tagstack.close_tag
+ # # ignore asterisks directly after the opening; these are phpBBCode
+ # elsif tag.respond_to? :asterisk
+ # debug 'SKIP ASTERISKS: ' if @scanner.skip(ASTERISK_TAGS_SCAN)
+ end
+
+ # CLOSE
+ elsif @tagstack.try_close_class(tag_class)
+ #debug 'closing'
+ # GARBAGE
+ else
+ return
+ end
+
+ true
+ end
+
+ def handle_asterisk tag_start, stars
+ #debug 'ASTERISK: ' + stars.to_s
+ # rule for asterisk tags: they belong to the last tag
+ # that handles them. tags opened after this tag are closed.
+ # if no open tag uses them, all are closed.
+ tag = @tagstack.close_all_until { |tag| tag.respond_to? :asterisk }
+ unless tag and tag.asterisk stars, @scanner
+ garbage_out tag_start
+ end
+ end
+
+ def handle_special_content tag
+ scanned = @scanner.scan_until(tag.closing_tag)
+ if scanned
+ scanned.slice!(-(@scanner.matched.size)..-1)
+ else
+ scanned = @scanner.scan(/.*/m).to_s
+ end
+ #debug 'SPECIAL CONTENT: ' + scanned
+ tag.handle_content(scanned)
+ end
+
+ def unescape text
+ # input: correctly formatted quoted string (without the quotes)
+ text.gsub(/\\(?:(["\\])|.)/) { $1 or $& }
+ end
+
+
+ # MAGIC FEAUTURES
+
+ URL_PATTERN = /(?:(?:www|ftp)\.|(?>\w{3,}):\/\/)\S+/
+ EMAIL_PATTERN = /(?>[\w\-_.]+)@[\w\-\.]+\.\w+/
+
+ HAS_MAGIC = /[&@#{Smileys::SMILEY_START_CHARS}]|(?i:www|ftp)/
+
+ MAGIC_PATTERN = Regexp.new('(\W|^)(%s)' %
+ [Smileys::MAGIC_SMILEY_PATTERN, URL_PATTERN, EMAIL_PATTERN].map { |pattern|
+ pattern.to_s
+ }.join('|') )
+
+ IS_SMILEY_PATTERN = Regexp.new('^%s' % Smileys::SMILEY_START_CHARSET.to_s )
+ IS_URL_PATTERN = /^(?:(?i:www|ftp)\.|(?>\w+):\/\/)/
+ URL_STARTS_WITH_PROTOCOL = /^\w+:\/\//
+ IS_EMAIL_PATTERN = /^[\w\-_.]+@/
+
+ def to_magic text
+ # debug MAGIC_PATTERN.to_s
+ text.gsub!(MAGIC_PATTERN) {
+ magic = $2
+ $1 + case magic
+ when IS_SMILEY_PATTERN
+ Smileys.smiley_to_img magic
+ when IS_URL_PATTERN
+ last = magic.slice_punctation! # no punctation in my URL
+ href = magic
+ href.insert(0, 'http://') unless magic =~ URL_STARTS_WITH_PROTOCOL
+ '<a href="' + href + '">' + magic + '</a>' + last
+ when IS_EMAIL_PATTERN
+ last = magic.slice_punctation!
+ '<a href="mailto:' + magic + '">' + magic + '</a>' + last
+ else
+ raise '{{{' + magic + '}}}'
+ end
+ }
+ text
+ end
+
+ # handles smileys and urls
+ def parse_magic html
+ return html unless @do_magic
+ scanner = StringScanner.new html
+ out = ''
+ while scanner.rest?
+ if scanner.scan(/ < (?: a\s .*? <\/a> | pre\W .*? <\/pre> | [^>]* > ) /mx)
+ out << scanner.matched
+ elsif scanner.scan(/ [^<]+ /x)
+ out << to_magic(scanner.matched)
+
+ # this should never happen
+ elsif scanner.scan(/./m)
+ raise 'ERROR: else case reached'
+ end
+ end
+ out
+ end
+ end # Parser
+end
+
+class String
+ def slice_punctation!
+ slice!(/[.:,!\?]+$/).to_s # return '' instead of nil
+ end
+end
+
+#
+# = Grammar
+#
+# An implementation of common algorithms on grammars.
+#
+# This is used by Shinobu, a visualization tool for educating compiler-building.
+#
+# Thanks to Andreas Kunert for his wonderful LR(k) Pamphlet (German, see http://www.informatik.hu-berlin.de/~kunert/papers/lr-analyse), and Aho/Sethi/Ullman for their Dragon Book.
+#
+# Homepage:: http://shinobu.cYcnus.de (not existing yet)
+# Author:: murphy (Kornelius Kalnbach)
+# Copyright:: (cc) 2005 cYcnus
+# License:: GPL
+# Version:: 0.2.0 (2005-03-27)
+
+require 'set_hash'
+require 'ctype'
+require 'tools'
+require 'rules'
+require 'trace'
+
+require 'first'
+require 'follow'
+
+# = Grammar
+#
+# == Syntax
+#
+# === Rules
+#
+# Each line is a rule.
+# The syntax is
+#
+# left - right
+#
+# where +left+ and +right+ can be uppercase and lowercase letters,
+# and <code>-</code> can be any combination of <, >, - or whitespace.
+#
+# === Symbols
+#
+# Uppercase letters stand for meta symbols, lowercase for terminals.
+#
+# You can make epsilon-derivations by leaving <code><right></code> empty.
+#
+# === Example
+# S - Ac
+# A - Sc
+# A - b
+# A -
+class Grammar
+
+ attr_reader :tracer
+ # Creates a new Grammar.
+ # If $trace is true, the algorithms explain (textual) what they do to $stdout.
+ def initialize data, tracer = Tracer.new
+ @tracer = tracer
+ @rules = Rules.new
+ @terminals, @meta_symbols = SortedSet.new, Array.new
+ @start_symbol = nil
+ add_rules data
+ end
+
+ attr_reader :meta_symbols, :terminals, :rules, :start_symbol
+
+ alias_method :sigma, :terminals
+ alias_method :alphabet, :terminals
+ alias_method :variables, :meta_symbols
+ alias_method :nonterminals, :meta_symbols
+
+ # A string representation of the grammar for debugging.
+ def inspect productions_too = false
+ 'Grammar(meta symbols: %s; alphabet: %s; productions: [%s]; start symbol: %s)' %
+ [
+ meta_symbols.join(', '),
+ terminals.join(', '),
+ if productions_too
+ @rules.inspect
+ else
+ @rules.size
+ end,
+ start_symbol
+ ]
+ end
+
+ # Add rules to the grammar. +rules+ should be a String or respond to +scan+ in a similar way.
+ #
+ # Syntax: see Grammar.
+ def add_rules grammar
+ @rules = Rules.parse grammar do |rule|
+ @start_symbol ||= rule.left
+ @meta_symbols << rule.left
+ @terminals.merge rule.right.split('').select { |s| terminal? s }
+ end
+ @meta_symbols.uniq!
+ update
+ end
+
+ # Returns a hash acting as FIRST operator, so that
+ # <code>first["ABC"]</code> is FIRST(ABC).
+ # See http://en.wikipedia.org/wiki/LL_parser "Constructing an LL(1) parsing table" for details.
+ def first
+ first_operator
+ end
+
+ # Returns a hash acting as FOLLOW operator, so that
+ # <code>first["A"]</code> is FOLLOW(A).
+ # See http://en.wikipedia.org/wiki/LL_parser "Constructing an LL(1) parsing table" for details.
+ def follow
+ follow_operator
+ end
+
+ LLError = Class.new(Exception)
+ LLErrorType1 = Class.new(LLError)
+ LLErrorType2 = Class.new(LLError)
+
+ # Tests if the grammar is LL(1).
+ def ll1?
+ begin
+ for meta in @meta_symbols
+ first_sets = @rules[meta].map { |alpha| first[alpha] }
+ first_sets.inject(Set[]) do |already_used, another_first_set|
+ unless already_used.disjoint? another_first_set
+ raise LLErrorType1
+ end
+ already_used.merge another_first_set
+ end
+
+ if first[meta].include? EPSILON and not first[meta].disjoint? follow[meta]
+ raise LLErrorType2
+ end
+ end
+ rescue LLError
+ false
+ else
+ true
+ end
+ end
+
+private
+
+ def first_operator
+ @first ||= FirstOperator.new self
+ end
+
+ def follow_operator
+ @follow ||= FollowOperator.new self
+ end
+
+ def update
+ @first = @follow = nil
+ end
+
+end
+
+if $0 == __FILE__
+ eval DATA.read, nil, $0, __LINE__+4
+end
+
+require 'test/unit'
+
+class TestCaseGrammar < Test::Unit::TestCase
+
+ include Grammar::Symbols
+
+ def fifo s
+ Set[*s.split('')]
+ end
+
+ def test_fifo
+ assert_equal Set[], fifo('')
+ assert_equal Set[EPSILON, END_OF_INPUT, 'x', 'Y'], fifo('?xY$')
+ end
+
+ TEST_GRAMMAR_1 = <<-EOG
+S - ABCD
+A - a
+A -
+B - b
+B -
+C - c
+C -
+D - S
+D -
+ EOG
+
+ def test_symbols
+ assert EPSILON
+ assert END_OF_INPUT
+ end
+
+ def test_first_1
+ g = Grammar.new TEST_GRAMMAR_1
+
+ f = nil
+ assert_nothing_raised { f = g.first }
+ assert_equal(Set['a', EPSILON], f['A'])
+ assert_equal(Set['b', EPSILON], f['B'])
+ assert_equal(Set['c', EPSILON], f['C'])
+ assert_equal(Set['a', 'b', 'c', EPSILON], f['D'])
+ assert_equal(f['D'], f['S'])
+ end
+
+ def test_follow_1
+ g = Grammar.new TEST_GRAMMAR_1
+
+ f = nil
+ assert_nothing_raised { f = g.follow }
+ assert_equal(Set['a', 'b', 'c', END_OF_INPUT], f['A'])
+ assert_equal(Set['a', 'b', 'c', END_OF_INPUT], f['B'])
+ assert_equal(Set['a', 'b', 'c', END_OF_INPUT], f['C'])
+ assert_equal(Set[END_OF_INPUT], f['D'])
+ assert_equal(Set[END_OF_INPUT], f['S'])
+ end
+
+
+ TEST_GRAMMAR_2 = <<-EOG
+S - Ed
+E - EpT
+E - EmT
+E - T
+T - TuF
+T - TdF
+T - F
+F - i
+F - n
+F - aEz
+ EOG
+
+ def test_first_2
+ g = Grammar.new TEST_GRAMMAR_2
+
+ f = nil
+ assert_nothing_raised { f = g.first }
+ assert_equal(Set['a', 'n', 'i'], f['E'])
+ assert_equal(Set['a', 'n', 'i'], f['F'])
+ assert_equal(Set['a', 'n', 'i'], f['T'])
+ assert_equal(Set['a', 'n', 'i'], f['S'])
+ end
+
+ def test_follow_2
+ g = Grammar.new TEST_GRAMMAR_2
+
+ f = nil
+ assert_nothing_raised { f = g.follow }
+ assert_equal(Set['m', 'd', 'z', 'p'], f['E'])
+ assert_equal(Set['m', 'd', 'z', 'p', 'u'], f['F'])
+ assert_equal(Set['m', 'd', 'z', 'p', 'u'], f['T'])
+ assert_equal(Set[END_OF_INPUT], f['S'])
+ end
+
+ LLError = Grammar::LLError
+
+ TEST_GRAMMAR_3 = <<-EOG
+E - TD
+D - pTD
+D -
+T - FS
+S - uFS
+S -
+S - p
+F - aEz
+F - i
+ EOG
+
+ NoError = Class.new(Exception)
+
+ def test_first_3
+ g = Grammar.new TEST_GRAMMAR_3
+
+ # Grammar 3 is LL(1), so all first-sets must be disjoint.
+ f = nil
+ assert_nothing_raised { f = g.first }
+ assert_equal(Set['a', 'i'], f['E'])
+ assert_equal(Set[EPSILON, 'p'], f['D'])
+ assert_equal(Set['a', 'i'], f['F'])
+ assert_equal(Set['a', 'i'], f['T'])
+ assert_equal(Set[EPSILON, 'u', 'p'], f['S'])
+ for m in g.meta_symbols
+ r = g.rules[m]
+ firsts = r.map { |x| f[x] }.to_set
+ assert_nothing_raised do
+ firsts.inject(Set.new) do |already_used, another_first_set|
+ raise LLError, 'not disjoint!' unless already_used.disjoint? another_first_set
+ already_used.merge another_first_set
+ end
+ end
+ end
+ end
+
+ def test_follow_3
+ g = Grammar.new TEST_GRAMMAR_3
+
+ # Grammar 3 is not LL(1), because epsilon is in FIRST(S),
+ # but FIRST(S) and FOLLOW(S) are not disjoint.
+ f = nil
+ assert_nothing_raised { f = g.follow }
+ assert_equal(Set['z', END_OF_INPUT], f['E'])
+ assert_equal(Set['z', END_OF_INPUT], f['D'])
+ assert_equal(Set['z', 'p', 'u', END_OF_INPUT], f['F'])
+ assert_equal(Set['p', 'z', END_OF_INPUT], f['T'])
+ assert_equal(Set['p', 'z', END_OF_INPUT], f['S'])
+ for m in g.meta_symbols
+ first_m = g.first[m]
+ next unless first_m.include? EPSILON
+ assert_raise(m == 'S' ? LLError : NoError) do
+ if first_m.disjoint? f[m]
+ raise NoError # this is fun :D
+ else
+ raise LLError
+ end
+ end
+ end
+ end
+
+ TEST_GRAMMAR_3b = <<-EOG
+E - TD
+D - pTD
+D - PTD
+D -
+T - FS
+S - uFS
+S -
+F - aEz
+F - i
+P - p
+ EOG
+
+ def test_first_3b
+ g = Grammar.new TEST_GRAMMAR_3b
+
+ # Grammar 3b is NOT LL(1), since not all first-sets are disjoint.
+ f = nil
+ assert_nothing_raised { f = g.first }
+ assert_equal(Set['a', 'i'], f['E'])
+ assert_equal(Set[EPSILON, 'p'], f['D'])
+ assert_equal(Set['p'], f['P'])
+ assert_equal(Set['a', 'i'], f['F'])
+ assert_equal(Set['a', 'i'], f['T'])
+ assert_equal(Set[EPSILON, 'u'], f['S'])
+ for m in g.meta_symbols
+ r = g.rules[m]
+ firsts = r.map { |x| f[x] }
+ assert_raise(m == 'D' ? LLError : NoError) do
+ firsts.inject(Set.new) do |already_used, another_first_set|
+ raise LLError, 'not disjoint!' unless already_used.disjoint? another_first_set
+ already_used.merge another_first_set
+ end
+ raise NoError
+ end
+ end
+ end
+
+ def test_follow_3b
+ g = Grammar.new TEST_GRAMMAR_3b
+
+ # Although Grammar 3b is NOT LL(1), the FOLLOW-condition is satisfied.
+ f = nil
+ assert_nothing_raised { f = g.follow }
+ assert_equal(fifo('z$'), f['E'], 'E')
+ assert_equal(fifo('z$'), f['D'], 'D')
+ assert_equal(fifo('ai'), f['P'], 'P')
+ assert_equal(fifo('z$pu'), f['F'], 'F')
+ assert_equal(fifo('z$p'), f['T'], 'T')
+ assert_equal(fifo('z$p'), f['S'], 'S')
+ for m in g.meta_symbols
+ first_m = g.first[m]
+ next unless first_m.include? EPSILON
+ assert_raise(NoError) do
+ if first_m.disjoint? f[m]
+ raise NoError # this is fun :D
+ else
+ raise LLError
+ end
+ end
+ end
+ end
+
+ def test_ll1?
+ assert_equal false, Grammar.new(TEST_GRAMMAR_3).ll1?, 'Grammar 3'
+ assert_equal false, Grammar.new(TEST_GRAMMAR_3b).ll1?, 'Grammar 3b'
+ end
+
+ def test_new
+ assert_nothing_raised { Grammar.new '' }
+ assert_nothing_raised { Grammar.new TEST_GRAMMAR_1 }
+ assert_nothing_raised { Grammar.new TEST_GRAMMAR_2 }
+ assert_nothing_raised { Grammar.new TEST_GRAMMAR_3 }
+ assert_nothing_raised { Grammar.new TEST_GRAMMAR_1 + TEST_GRAMMAR_2 + TEST_GRAMMAR_3 }
+ assert_raise(ArgumentError) { Grammar.new 'S - ?' }
+ end
+end
+
+# vim:foldmethod=syntax
+
+#!/usr/bin/env ruby
+
+require 'fox12'
+
+include Fox
+
+class Window < FXMainWindow
+ def initialize(app)
+ super(app, app.appName + ": First Set Calculation", nil, nil, DECOR_ALL, 0, 0, 800, 600, 0, 0)
+
+ # {{{ menubar
+ menubar = FXMenuBar.new(self, LAYOUT_SIDE_TOP|LAYOUT_FILL_X)
+
+ filemenu = FXMenuPane.new(self)
+
+ FXMenuCommand.new(filemenu, "&Start\tCtl-S\tStart the application.", nil, getApp()).connect(SEL_COMMAND, method(:start))
+ FXMenuCommand.new(filemenu, "&Quit\tAlt-F4\tQuit the application.", nil, getApp(), FXApp::ID_QUIT)
+ FXMenuTitle.new(menubar, "&File", nil, filemenu)
+ # }}} menubar
+
+ # {{{ statusbar
+ @statusbar = FXStatusBar.new(self, LAYOUT_SIDE_BOTTOM|LAYOUT_FILL_X|STATUSBAR_WITH_DRAGCORNER)
+ # }}} statusbar
+
+ # {{{ window content
+ horizontalsplitt = FXSplitter.new(self, SPLITTER_VERTICAL|LAYOUT_SIDE_TOP|LAYOUT_FILL)
+
+
+ @productions = FXList.new(horizontalsplitt, nil, 0, LAYOUT_SIDE_TOP|LAYOUT_FILL_X|LAYOUT_FIX_HEIGHT|LIST_SINGLESELECT)
+ @productions.height = 100
+
+ @result = FXTable.new(horizontalsplitt, nil, 0, LAYOUT_FILL)
+ @result.height = 200
+ @result.setTableSize(2, 2, false)
+ @result.rowHeaderWidth = 0
+
+ header = @result.columnHeader
+ header.setItemText 0, 'X'
+ header.setItemText 1, 'FIRST(X)'
+ for item in header
+ item.justification = FXHeaderItem::CENTER_X
+ end
+
+ @debug = FXText.new(horizontalsplitt, nil, 0, LAYOUT_SIDE_BOTTOM|LAYOUT_FILL_X|LAYOUT_FIX_HEIGHT)
+ @debug.height = 200
+
+ # }}} window content
+ end
+
+ def load_grammar grammar
+ @tracer = FirstTracer.new(self)
+ @grammar = Grammar.new grammar, @tracer
+ @rules_indexes = Hash.new
+ @grammar.rules.each_with_index do |rule, i|
+ @productions.appendItem rule.inspect
+ @rules_indexes[rule] = i
+ end
+ end
+
+ def create
+ super
+ show(PLACEMENT_SCREEN)
+ end
+
+ def rule rule
+ @productions.selectItem @rules_indexes[rule]
+ sleep 0.1
+ end
+
+ def iterate i
+ setTitle i.to_s
+ sleep 0.1
+ end
+
+ def missing what
+ @debug.appendText what + "\n"
+ sleep 0.1
+ end
+
+ def start sender, sel, pointer
+ Thread.new do
+ begin
+ @grammar.first
+ rescue => boom
+ @debug.appendText [boom.to_s, *boom.backtrace].join("\n")
+ end
+ end
+ end
+
+end
+
+$: << 'grammar'
+require 'grammar'
+
+require 'first_tracer'
+
+app = FXApp.new("Shinobu", "cYcnus")
+
+# fenster erzeugen
+window = Window.new app
+
+unless ARGV.empty?
+ grammar = File.read(ARGV.first)
+else
+ grammar = <<-EOG1
+Z --> S
+S --> Sb
+S --> bAa
+A --> aSc
+A --> a
+A --> aSb
+ EOG1
+end
+
+window.load_grammar grammar
+
+app.create
+app.run
+
+require 'erb'
+require 'ftools'
+require 'yaml'
+require 'redcloth'
+
+module WhyTheLuckyStiff
+ class Book
+ attr_accessor :author, :title, :terms, :image, :teaser,
+ :chapters, :expansion_paks, :encoding, :credits
+ def [] x
+ @lang.fetch(x) do
+ warn warning = "[not translated: '#{x}'!]"
+ warning
+ end
+ end
+ end
+
+ def Book::load( file_name )
+ YAML::load( File.open( file_name ) )
+ end
+
+ class Section
+ attr_accessor :index, :header, :content
+ def initialize( i, h, c )
+ @index, @header, @content = i, h, RedCloth::new( c.to_s )
+ end
+ end
+
+ class Sidebar
+ attr_accessor :title, :content
+ end
+
+ YAML::add_domain_type( 'whytheluckystiff.net,2003', 'sidebar' ) do |taguri, val|
+ YAML::object_maker( Sidebar, 'title' => val.keys.first, 'content' => RedCloth::new( val.values.first ) )
+ end
+ class Chapter
+ attr_accessor :index, :title, :sections
+ def initialize( i, t, sects )
+ @index = i
+ @title = t
+ i = 0
+ @sections = sects.collect do |s|
+ if s.respond_to?( :keys )
+ i += 1
+ Section.new( i, s.keys.first, s.values.first )
+ else
+ s
+ end
+ end
+ end
+ end
+
+ YAML::add_domain_type( 'whytheluckystiff.net,2003', 'book' ) do |taguri, val|
+ ['chapters', 'expansion_paks'].each do |chaptype|
+ i = 0
+ val[chaptype].collect! do |c|
+ i += 1
+ Chapter::new( i, c.keys.first, c.values.first )
+ end
+ end
+ val['teaser'].collect! do |t|
+ Section::new( 1, t.keys.first, t.values.first )
+ end
+ val['terms'] = RedCloth::new( val['terms'] )
+ YAML::object_maker( Book, val )
+ end
+
+ class Image
+ attr_accessor :file_name
+ end
+
+ YAML::add_domain_type( 'whytheluckystiff.net,2003', 'img' ) do |taguri, val|
+ YAML::object_maker( Image, 'file_name' => "i/" + val )
+ end
+end
+
+#
+# Convert the book to HTML
+#
+if __FILE__ == $0
+ unless ARGV[0]
+ puts "Usage: #{$0} [/path/to/save/html]"
+ exit
+ end
+
+ site_path = ARGV[0]
+ book = WhyTheLuckyStiff::Book::load( 'poignant.yml' )
+ chapter = nil
+
+ # Write index page
+ index_tpl = ERB::new( File.open( 'index.erb' ).read )
+ File.open( File.join( site_path, 'index.html' ), 'w' ) do |out|
+ out << index_tpl.result
+ end
+
+ book.chapters = book.chapters[0,3] if ARGV.include? '-fast'
+
+ # Write chapter pages
+ chapter_tpl = ERB::new( File.open( 'chapter.erb' ).read )
+ book.chapters.each do |chapter|
+ File.open( File.join( site_path, "chapter-#{ chapter.index }.html" ), 'w' ) do |out|
+ out << chapter_tpl.result
+ end
+ end
+ exit if ARGV.include? '-fast'
+
+ # Write expansion pak pages
+ expak_tpl = ERB::new( File.open( 'expansion-pak.erb' ).read )
+ book.expansion_paks.each do |pak|
+ File.open( File.join( site_path, "expansion-pak-#{ pak.index }.html" ), 'w' ) do |out|
+ out << expak_tpl.result( binding )
+ end
+ end
+
+ # Write printable version
+ print_tpl = ERB::new( File.open( 'print.erb' ).read )
+ File.open( File.join( site_path, "print.html" ), 'w' ) do |out|
+ out << print_tpl.result
+ end
+
+ # Copy css + images into site
+ copy_list = ["guide.css"] +
+ Dir["i/*"].find_all { |image| image =~ /\.(gif|jpg|png)$/ }
+
+ File.makedirs( File.join( site_path, "i" ) )
+ copy_list.each do |copy_file|
+ File.copy( copy_file, File.join( site_path, copy_file ) )
+ end
+end
+
+#!/usr/bin/env ruby
+
+require 'fox'
+begin
+ require 'opengl'
+rescue LoadError
+ require 'fox/missingdep'
+ MSG = <<EOM
+ Sorry, this example depends on the OpenGL extension. Please
+ check the Ruby Application Archives for an appropriate
+ download site.
+EOM
+ missingDependency(MSG)
+end
+
+
+include Fox
+include Math
+
+Deg2Rad = Math::PI / 180
+
+D_MAX = 6
+SQUARE_SIZE = 2.0 / D_MAX
+SQUARE_DISTANCE = 4.0 / D_MAX
+AMPLITUDE = SQUARE_SIZE
+LAMBDA = D_MAX.to_f / 2
+
+class GLTestWindow < FXMainWindow
+
+ # How often our timer will fire (in milliseconds)
+ TIMER_INTERVAL = 500
+
+ # Rotate the boxes when a timer message is received
+ def onTimeout(sender, sel, ptr)
+ @angle += 10.0
+# @size = 0.5 + 0.2 * Math.cos(Deg2Rad * @angle)
+ drawScene()
+ @timer = getApp().addTimeout(TIMER_INTERVAL, method(:onTimeout))
+ end
+
+ # Rotate the boxes when a chore message is received
+ def onChore(sender, sel, ptr)
+ @angle += 10.0
+# @angle %= 360.0
+# @size = 0.5 + 0.2 * Math.cos(Deg2Rad * @angle)
+ drawScene()
+ @chore = getApp().addChore(method(:onChore))
+ end
+
+ # Draw the GL scene
+ def drawScene
+ lightPosition = [15.0, 10.0, 5.0, 1.0]
+ lightAmbient = [ 0.1, 0.1, 0.1, 1.0]
+ lightDiffuse = [ 0.9, 0.9, 0.9, 1.0]
+ redMaterial = [ 0.0, 0.0, 1.0, 1.0]
+ blueMaterial = [ 0.0, 1.0, 0.0, 1.0]
+
+ width = @glcanvas.width.to_f
+ height = @glcanvas.height.to_f
+ aspect = width/height
+
+ # Make context current
+ @glcanvas.makeCurrent()
+
+ GL.Viewport(0, 0, @glcanvas.width, @glcanvas.height)
+
+ GL.ClearColor(1.0/256, 0.0, 5.0/256, 1.0)
+ GL.Clear(GL::COLOR_BUFFER_BIT|GL::DEPTH_BUFFER_BIT)
+ GL.Enable(GL::DEPTH_TEST)
+
+ GL.Disable(GL::DITHER)
+
+ GL.MatrixMode(GL::PROJECTION)
+ GL.LoadIdentity()
+ GLU.Perspective(30.0, aspect, 1.0, 100.0)
+
+ GL.MatrixMode(GL::MODELVIEW)
+ GL.LoadIdentity()
+ GLU.LookAt(5.0, 10.0, 15.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0)
+
+ GL.ShadeModel(GL::SMOOTH)
+ GL.Light(GL::LIGHT0, GL::POSITION, lightPosition)
+ GL.Light(GL::LIGHT0, GL::AMBIENT, lightAmbient)
+ GL.Light(GL::LIGHT0, GL::DIFFUSE, lightDiffuse)
+ GL.Enable(GL::LIGHT0)
+ GL.Enable(GL::LIGHTING)
+
+ GL.Rotated(0.1*@angle, 0.0, 1.0, 0.0)
+ for x in -D_MAX..D_MAX
+ for y in -D_MAX..D_MAX
+ h1 = (x + y - 2).abs
+ h2 = (y - x + 1).abs
+ GL.PushMatrix
+ c = [1, 0, 0, 1]
+ GL.Material(GL::FRONT, GL::AMBIENT, c)
+ GL.Material(GL::FRONT, GL::DIFFUSE, c)
+
+ GL.Translated(
+ y * SQUARE_DISTANCE,
+ AMPLITUDE * h1,
+ x * SQUARE_DISTANCE
+ )
+
+ GL.Begin(GL::TRIANGLE_STRIP)
+ GL.Normal(1.0, 0.0, 0.0)
+ GL.Vertex(-SQUARE_SIZE, +SQUARE_SIZE, -SQUARE_SIZE)
+ GL.Vertex(-SQUARE_SIZE, +SQUARE_SIZE, +SQUARE_SIZE)
+ GL.Vertex(+SQUARE_SIZE, +SQUARE_SIZE, -SQUARE_SIZE)
+ GL.Vertex(+SQUARE_SIZE, +SQUARE_SIZE, +SQUARE_SIZE)
+ GL.End
+
+ GL.PopMatrix
+
+ GL.PushMatrix
+ c = [0, 0, 1, 1]
+ GL.Material(GL::FRONT, GL::AMBIENT, c)
+ GL.Material(GL::FRONT, GL::DIFFUSE, c)
+
+ GL.Translated(
+ y * SQUARE_DISTANCE,
+ AMPLITUDE * h2,
+ x * SQUARE_DISTANCE
+ )
+
+ GL.Begin(GL::TRIANGLE_STRIP)
+ GL.Normal(1.0, 0.0, 0.0)
+ GL.Vertex(-SQUARE_SIZE, +SQUARE_SIZE, -SQUARE_SIZE)
+ GL.Vertex(-SQUARE_SIZE, +SQUARE_SIZE, +SQUARE_SIZE)
+ GL.Vertex(+SQUARE_SIZE, +SQUARE_SIZE, -SQUARE_SIZE)
+ GL.Vertex(+SQUARE_SIZE, +SQUARE_SIZE, +SQUARE_SIZE)
+ GL.End
+
+ GL.PopMatrix
+
+ GL.PushMatrix
+ c = [0.0 + (x/10.0), 0.0 + (y/10.0), 0, 1]
+ GL.Material(GL::FRONT, GL::AMBIENT, c)
+ GL.Material(GL::FRONT, GL::DIFFUSE, c)
+
+ GL.Translated(
+ y * SQUARE_DISTANCE,
+ 0,
+ x * SQUARE_DISTANCE
+ )
+
+ GL.Begin(GL::TRIANGLE_STRIP)
+ GL.Normal(1.0, 0.0, 0.0)
+ GL.Vertex(-SQUARE_SIZE, +SQUARE_SIZE, -SQUARE_SIZE)
+ GL.Vertex(-SQUARE_SIZE, +SQUARE_SIZE, +SQUARE_SIZE)
+ GL.Vertex(+SQUARE_SIZE, +SQUARE_SIZE, -SQUARE_SIZE)
+ GL.Vertex(+SQUARE_SIZE, +SQUARE_SIZE, +SQUARE_SIZE)
+ GL.End
+
+ GL.PopMatrix
+ end
+ end
+
+ # Swap if it is double-buffered
+ if @glvisual.isDoubleBuffer
+ @glcanvas.swapBuffers
+ end
+
+ # Make context non-current
+ @glcanvas.makeNonCurrent
+ end
+
+ def initialize(app)
+ # Invoke the base class initializer
+ super(app, "OpenGL Test Application", nil, nil, DECOR_ALL, 0, 0, 1024, 768)
+
+ # Construct the main window elements
+ frame = FXHorizontalFrame.new(self, LAYOUT_SIDE_TOP|LAYOUT_FILL_X|LAYOUT_FILL_Y)
+ frame.padLeft, frame.padRight = 0, 0
+ frame.padTop, frame.padBottom = 0, 0
+
+ # Left pane to contain the glcanvas
+ glcanvasFrame = FXVerticalFrame.new(frame,
+ LAYOUT_FILL_X|LAYOUT_FILL_Y|LAYOUT_TOP|LAYOUT_LEFT)
+ glcanvasFrame.padLeft, glcanvasFrame.padRight = 10, 10
+ glcanvasFrame.padTop, glcanvasFrame.padBottom = 10, 10
+
+ # Label above the glcanvas
+ FXLabel.new(glcanvasFrame, "OpenGL Canvas Frame", nil,
+ JUSTIFY_CENTER_X|LAYOUT_FILL_X)
+
+ # Horizontal divider line
+ FXHorizontalSeparator.new(glcanvasFrame, SEPARATOR_GROOVE|LAYOUT_FILL_X)
+
+ # Drawing glcanvas
+ glpanel = FXVerticalFrame.new(glcanvasFrame, (FRAME_SUNKEN|FRAME_THICK|
+ LAYOUT_FILL_X|LAYOUT_FILL_Y|LAYOUT_TOP|LAYOUT_LEFT))
+ glpanel.padLeft, glpanel.padRight = 0, 0
+ glpanel.padTop, glpanel.padBottom = 0, 0
+
+ # A visual to draw OpenGL
+ @glvisual = FXGLVisual.new(getApp(), VISUAL_DOUBLEBUFFER)
+
+ # Drawing glcanvas
+ @glcanvas = FXGLCanvas.new(glpanel, @glvisual, nil, 0,
+ LAYOUT_FILL_X|LAYOUT_FILL_Y|LAYOUT_TOP|LAYOUT_LEFT)
+ @glcanvas.connect(SEL_PAINT) {
+ drawScene
+ }
+ @glcanvas.connect(SEL_CONFIGURE) {
+ if @glcanvas.makeCurrent
+ GL.Viewport(0, 0, @glcanvas.width, @glcanvas.height)
+ @glcanvas.makeNonCurrent
+ end
+ }
+
+ # Right pane for the buttons
+ buttonFrame = FXVerticalFrame.new(frame, LAYOUT_FILL_Y|LAYOUT_TOP|LAYOUT_LEFT)
+ buttonFrame.padLeft, buttonFrame.padRight = 10, 10
+ buttonFrame.padTop, buttonFrame.padBottom = 10, 10
+
+ # Label above the buttons
+ FXLabel.new(buttonFrame, "Button Frame", nil,
+ JUSTIFY_CENTER_X|LAYOUT_FILL_X)
+
+ # Horizontal divider line
+ FXHorizontalSeparator.new(buttonFrame, SEPARATOR_RIDGE|LAYOUT_FILL_X)
+
+ # Spin according to timer
+ spinTimerBtn = FXButton.new(buttonFrame,
+ "Spin &Timer\tSpin using interval timers\nNote the app
+ blocks until the interal has elapsed...", nil,
+ nil, 0, FRAME_THICK|FRAME_RAISED|LAYOUT_FILL_X|LAYOUT_TOP|LAYOUT_LEFT)
+ spinTimerBtn.padLeft, spinTimerBtn.padRight = 10, 10
+ spinTimerBtn.padTop, spinTimerBtn.padBottom = 5, 5
+ spinTimerBtn.connect(SEL_COMMAND) {
+ @spinning = true
+ @timer = getApp().addTimeout(TIMER_INTERVAL, method(:onTimeout))
+ }
+ spinTimerBtn.connect(SEL_UPDATE) { |sender, sel, ptr|
+ @spinning ? sender.disable : sender.enable
+ }
+
+ # Spin according to chore
+ spinChoreBtn = FXButton.new(buttonFrame,
+ "Spin &Chore\tSpin as fast as possible using chores\nNote even though the
+ app is very responsive, it never blocks;\nthere is always something to
+ do...", nil,
+ nil, 0, FRAME_THICK|FRAME_RAISED|LAYOUT_FILL_X|LAYOUT_TOP|LAYOUT_LEFT)
+ spinChoreBtn.padLeft, spinChoreBtn.padRight = 10, 10
+ spinChoreBtn.padTop, spinChoreBtn.padBottom = 5, 5
+ spinChoreBtn.connect(SEL_COMMAND) {
+ @spinning = true
+ @chore = getApp().addChore(method(:onChore))
+ }
+ spinChoreBtn.connect(SEL_UPDATE) { |sender, sel, ptr|
+ @spinning ? sender.disable : sender.enable
+ }
+
+ # Stop spinning
+ stopBtn = FXButton.new(buttonFrame,
+ "&Stop Spin\tStop this mad spinning, I'm getting dizzy", nil,
+ nil, 0, FRAME_THICK|FRAME_RAISED|LAYOUT_FILL_X|LAYOUT_TOP|LAYOUT_LEFT)
+ stopBtn.padLeft, stopBtn.padRight = 10, 10
+ stopBtn.padTop, stopBtn.padBottom = 5, 5
+ stopBtn.connect(SEL_COMMAND) {
+ @spinning = false
+ if @timer
+ getApp().removeTimeout(@timer)
+ @timer = nil
+ end
+ if @chore
+ getApp().removeChore(@chore)
+ @chore = nil
+ end
+ }
+ stopBtn.connect(SEL_UPDATE) { |sender, sel, ptr|
+ @spinning ? sender.enable : sender.disable
+ }
+
+ # Exit button
+ exitBtn = FXButton.new(buttonFrame, "&Exit\tExit the application", nil,
+ getApp(), FXApp::ID_QUIT,
+ FRAME_THICK|FRAME_RAISED|LAYOUT_FILL_X|LAYOUT_TOP|LAYOUT_LEFT)
+ exitBtn.padLeft, exitBtn.padRight = 10, 10
+ exitBtn.padTop, exitBtn.padBottom = 5, 5
+
+ # Make a tooltip
+ FXTooltip.new(getApp())
+
+ # Initialize private variables
+ @spinning = false
+ @chore = nil
+ @timer = nil
+ @angle = 0.0
+ @size = 0.5
+ end
+
+ # Create and initialize
+ def create
+ super
+ show(PLACEMENT_SCREEN)
+ end
+end
+
+if __FILE__ == $0
+ # Construct the application
+ application = FXApp.new("GLTest", "FoxTest")
+
+ # To ensure that the chores-based spin will run as fast as possible,
+ # we can disable the chore in FXRuby's event loop that tries to schedule
+ # other threads. This is OK for this program because there aren't any
+ # other Ruby threads running.
+
+ #application.disableThreads
+
+ # Construct the main window
+ GLTestWindow.new(application)
+
+ # Create the app's windows
+ application.create
+
+ # Run the application
+ application.run
+end
+
+class Facelet
+ attr_accessor :color
+ def initialize(color)
+ @color = color
+ end
+
+ def to_s
+ @color
+ end
+end
+
+class Edge
+ attr_accessor :facelets, :colors
+
+ def initialize(facelets)
+ @facelets = facelets
+ @colors = @facelets.map { |fl| fl.color }
+ end
+
+ def apply(edge)
+ @facelets.each_with_index { |fl, i|
+ fl.color = edge.colors[i]
+ }
+ end
+
+ def inspect
+ "\n%s %s\n%s %s %s" % facelets
+ end
+end
+
+class Side
+ attr_reader :num, :facelets
+ attr_accessor :sides
+
+ def initialize(num)
+ @num = num
+ @sides = []
+ @facelets = []
+ @fl_by_side = {}
+ end
+
+ # facelets & sides
+ # 0
+ # 0 1 2
+ # 3 3 4 5 1
+ # 6 7 8
+ # 2
+
+ def facelets=(facelets)
+ @facelets = facelets.map { |c| Facelet.new(c) }
+ init_facelet 0, 3,0
+ init_facelet 1, 0
+ init_facelet 2, 0,1
+ init_facelet 3, 3
+ init_facelet 5, 1
+ init_facelet 6, 2,3
+ init_facelet 7, 2
+ init_facelet 8, 1,2
+ end
+
+ def <=>(side)
+ self.num <=> side.num
+ end
+
+ def init_facelet(pos, *side_nums)
+ sides = side_nums.map { |num| @sides[num] }.sort
+ @fl_by_side[sides] = pos
+ end
+
+ def []=(color, *sides)
+ @facelets[@fl_by_side[sides.sort]].color = color
+ end
+
+ def values_at(*sides)
+ sides.map { |sides| @facelets[@fl_by_side[sides.sort]] }
+ end
+
+ def inspect(range=nil)
+ if range
+ @facelets.values_at(*(range.to_a)).join(' ')
+ else
+ <<-EOS.gsub(/\d/) { |num| @facelets[num.to_i] }.gsub(/[ABCD]/) { |side| @sides[side[0]-?A].num.to_s }
+ A
+ 0 1 2
+ D 3 4 5 B
+ 6 7 8
+ C
+ EOS
+ end
+ end
+
+ def get_edge(side)
+ trio = (-1..1).map { |x| (side + x) % 4 }
+ prev_side, this_side, next_side = @sides.values_at(*trio)
+ e = Edge.new(
+ self .values_at( [this_side], [this_side, next_side] ) +
+ this_side.values_at( [self, prev_side], [self ], [self, next_side] )
+ )
+ #puts 'Edge created for side %d: ' % side + e.inspect
+ e
+ end
+
+ def turn(dir)
+ #p 'turn side %d in %d' % [num, dir]
+ edges = (0..3).map { |n| get_edge n }
+ for i in 0..3
+ edges[i].apply edges[(i-dir) % 4]
+ end
+ end
+end
+
+class Cube
+ def initialize
+ @sides = []
+ %w(left front right back top bottom).each_with_index { |side, i|
+ eval("@sides[#{i}] = @#{side} = Side.new(#{i})")
+ }
+ @left.sides = [@top, @front, @bottom, @back]
+ @front.sides = [@top, @right, @bottom, @left]
+ @right.sides = [@top, @back, @bottom, @front]
+ @back.sides = [@top, @left, @bottom, @right]
+ @top.sides = [@back, @right, @front, @left]
+ @bottom.sides = [@front, @right, @back, @left]
+ end
+
+ def read_facelets(fs)
+ pattern = Regexp.new(<<-EOP.gsub(/\w/, '\w').gsub(/\s+/, '\s*'))
+ (w w w)
+ (w w w)
+ (w w w)
+(r r r) (g g g) (b b b) (o o o)
+(r r r) (g g g) (b b b) (o o o)
+(r r r) (g g g) (b b b) (o o o)
+ (y y y)
+ (y y y)
+ (y y y)
+ EOP
+ md = pattern.match(fs).to_a
+
+ @top.facelets = parse_facelets(md.values_at(1,2,3))
+ @left.facelets = parse_facelets(md.values_at(4,8,12))
+ @front.facelets = parse_facelets(md.values_at(5,9,13))
+ @right.facelets = parse_facelets(md.values_at(6,10,14))
+ @back.facelets = parse_facelets(md.values_at(7,11,15))
+ @bottom.facelets = parse_facelets(md.values_at(16,17,18))
+ end
+
+ def turn(side, dir)
+ #p 'turn %d in %d' % [side, dir]
+ @sides[side].turn(dir)
+ #puts inspect
+ end
+
+ def inspect
+ <<-EOF.gsub(/(\d):(\d)-(\d)/) { @sides[$1.to_i].inspect(Range.new($2.to_i, $3.to_i)) }
+ 4:0-2
+ 4:3-5
+ 4:6-8
+0:0-2 1:0-2 2:0-2 3:0-2
+0:3-5 1:3-5 2:3-5 3:3-5
+0:6-8 1:6-8 2:6-8 3:6-8
+ 5:0-2
+ 5:3-5
+ 5:6-8
+ EOF
+ end
+
+private
+ def parse_facelets(rows)
+ rows.join.delete(' ').split(//)
+ end
+end
+
+#$stdin = DATA
+
+gets.to_i.times do |i|
+ puts "Scenario ##{i+1}:"
+ fs = ''
+ 9.times { fs << gets }
+ cube = Cube.new
+ cube.read_facelets fs
+ gets.to_i.times do |t|
+ side, dir = gets.split.map {|s| s.to_i}
+ cube.turn(side, dir)
+ end
+ puts cube.inspect
+ puts
+end
+
+# 2004 by murphy <korny@cYcnus.de>
+# GPL
+class Scenario
+ class TimePoint
+ attr_reader :data
+ def initialize *data
+ @data = data
+ end
+
+ def [] i
+ @data[i] or 0
+ end
+
+ include Comparable
+ def <=> tp
+ r = 0
+ [@data.size, tp.data.size].max.times do |i|
+ r = self[i] <=> tp[i]
+ return r if r.nonzero?
+ end
+ 0
+ end
+
+ def - tp
+ r = []
+ [@data.size, tp.data.size].max.times do |i|
+ r << self[i] - tp[i]
+ end
+ r
+ end
+
+ def inspect
+ # 01/01/1800 00:00:00
+ '%02d/%02d/%04d %02d:%02d:%02d' % @data.values_at(1, 2, 0, 3, 4, 5)
+ end
+ end
+
+ ONE_HOUR = TimePoint.new 0, 0, 0, 1, 0, 0
+
+ APPOINTMENT_PATTERN = /
+ ( \d{4} ) \s ( \d{2} ) \s ( \d{2} ) \s ( \d{2} ) \s ( \d{2} ) \s ( \d{2} ) \s
+ ( \d{4} ) \s ( \d{2} ) \s ( \d{2} ) \s ( \d{2} ) \s ( \d{2} ) \s ( \d{2} )
+ /x
+
+ def initialize io
+ @team_size = io.gets.to_i
+ @data = [ [TimePoint.new(1800, 01, 01, 00, 00, 00), @team_size] ]
+ @team_size.times do # each team member
+ io.gets.to_i.times do # each appointment
+ m = APPOINTMENT_PATTERN.match io.gets
+ @data << [TimePoint.new(*m.captures[0,6].map { |x| x.to_i }), -1]
+ @data << [TimePoint.new(*m.captures[6,6].map { |x| x.to_i }), +1]
+ end
+ end
+ @data << [TimePoint.new(2200, 01, 01, 00, 00, 00), -@team_size]
+ end
+
+ def print_time_plan
+ n = 0
+ appointment = nil
+ no_appointment = true
+ @data.sort_by { |x| x[0] }.each do |x|
+ tp, action = *x
+ n += action
+ # at any time during the meeting, at least two team members need to be there
+ # and at most one team member is allowed to be absent
+ if n >= 2 and (@team_size - n) <= 1
+ appointment ||= tp
+ else
+ if appointment
+ # the meeting should be at least one hour in length
+ if TimePoint.new(*(tp - appointment)) >= ONE_HOUR
+ puts 'appointment possible from %p to %p' % [appointment, tp]
+ no_appointment = false
+ end
+ appointment = false
+ end
+ end
+ end
+ puts 'no appointment possible' if no_appointment
+ end
+end
+
+# read the data
+DATA.gets.to_i.times do |si| # each scenario
+ puts 'Scenario #%d:' % (si + 1)
+ sc = Scenario.new DATA
+ sc.print_time_plan
+ puts
+end
+
+#__END__
+2
+3
+3
+2002 06 28 15 00 00 2002 06 28 18 00 00 TUD Contest Practice Session
+2002 06 29 10 00 00 2002 06 29 15 00 00 TUD Contest
+2002 11 15 15 00 00 2002 11 17 23 00 00 NWERC Delft
+4
+2002 06 25 13 30 00 2002 06 25 15 30 00 FIFA World Cup Semifinal I
+2002 06 26 13 30 00 2002 06 26 15 30 00 FIFA World Cup Semifinal II
+2002 06 29 13 00 00 2002 06 29 15 00 00 FIFA World Cup Third Place
+2002 06 30 13 00 00 2002 06 30 15 00 00 FIFA World Cup Final
+1
+2002 06 01 00 00 00 2002 06 29 18 00 00 Preparation of Problem Set
+2
+1
+1800 01 01 00 00 00 2200 01 01 00 00 00 Solving Problem 8
+0
+
+require 'token_consts'
+require 'symbol'
+require 'ctype'
+require 'error'
+
+class Fixnum
+ # Treat char as a digit and return it's value as Fixnum.
+ # Returns nonsense for non-digits.
+ # Examples:
+ # <code>
+ # RUBY_VERSION[0].digit == '1.8.2'[0].digit == 1
+ # </code>
+ #
+ # <code>
+ # ?6.digit == 6
+ # </code>
+ #
+ # <code>
+ # ?A.digit == 17
+ # </code>
+ def digit
+ self - ?0
+ end
+end
+
+##
+# Stellt einen einfachen Scanner für die lexikalische Analyse der Sprache Pas-0 dar.
+#
+# @author Andreas Kunert
+# Ruby port by murphy
+class Scanner
+
+ include TokenConsts
+
+ attr_reader :line, :pos
+
+ # To allow Scanner.new without parameters.
+ DUMMY_INPUT = 'dummy file'
+ def DUMMY_INPUT.getc
+ nil
+ end
+
+ ##
+ # Erzeugt einen Scanner, der als Eingabe das übergebene IO benutzt.
+ def initialize input = DUMMY_INPUT
+ @line = 1
+ @pos = 0
+
+ begin
+ @input = input
+ @next_char = @input.getc
+ rescue IOError # TODO show the reason!
+ Error.ioError
+ raise
+ end
+ end
+
+ ##
+ # Liest das n + def read_next_char
+ begin
+ @pos += 1
+ @current_char = @next_char
+ @next_char = @input.getc
+ rescue IOError
+ Error.ioError
+ raise
+ end
+ end
+
+ ##
+ # Sucht das nächste Symbol, identifiziert es, instantiiert ein entsprechendes
+ # PascalSymbol-Objekt und gibt es zurück.
+ # @see Symbol
+ # @return das gefundene Symbol als PascalSymbol-Objekt
+ def get_symbol
+ current_symbol = nil
+ until current_symbol
+ read_next_char
+
+ if @current_char.alpha?
+ identifier = @current_char.chr
+ while @next_char.alpha? or @next_char.digit?
+ identifier << @next_char
+ read_next_char
+ end
+ current_symbol = handle_identifier(identifier.upcase)
+ elsif @current_char.digit?
+ current_symbol = number
+ else
+ case @current_char
+ when ?\s
+ # ignore
+ when ?\n
+ new_line
+ when nil
+ current_symbol = PascalSymbol.new EOP
+ when ?{
+ comment
+
+ when ?:
+ if @next_char == ?=
+ read_next_char
+ current_symbol = PascalSymbol.new BECOMES
+ else
+ current_symbol = PascalSymbol.new COLON
+ end
+
+ when ?<
+ if (@next_char == ?=)
+ read_next_char
+ current_symbol = PascalSymbol.new LEQSY
+ elsif (@next_char == ?>)
+ read_next_char
+ current_symbol = PascalSymbol.new NEQSY
+ else
+ current_symbol = PascalSymbol.new LSSSY
+ end
+
+ when ?>
+ if (@next_char == ?=)
+ read_next_char
+ current_symbol = PascalSymbol.new GEQSY
+ else
+ current_symbol = PascalSymbol.new GRTSY
+ end
+
+ when ?. then current_symbol = PascalSymbol.new PERIOD
+ when ?( then current_symbol = PascalSymbol.new LPARENT
+ when ?, then current_symbol = PascalSymbol.new COMMA
+ when ?* then current_symbol = PascalSymbol.new TIMES
+ when ?/ then current_symbol = PascalSymbol.new SLASH
+ when ?+ then current_symbol = PascalSymbol.new PLUS
+ when ?- then current_symbol = PascalSymbol.new MINUS
+ when ?= then current_symbol = PascalSymbol.new EQLSY
+ when ?) then current_symbol = PascalSymbol.new RPARENT
+ when ?; then current_symbol = PascalSymbol.new SEMICOLON
+ else
+ Error.error(100, @line, @pos) if @current_char > ?\s
+ end
+ end
+ end
+ current_symbol
+ end
+
+private
+ ##
+ # Versucht, in dem gegebenen String ein Schlüsselwort zu erkennen.
+ # Sollte dabei ein Keyword gefunden werden, so gibt er ein PascalSymbol-Objekt zurück, das
+ # das entsprechende Keyword repräsentiert. Ansonsten besteht die Rückgabe aus
+ # einem SymbolIdent-Objekt (abgeleitet von PascalSymbol), das den String 1:1 enthält
+ # @see symbol
+ # @return falls Keyword gefunden, zugehöriges PascalSymbol, sonst SymbolIdent
+ def handle_identifier identifier
+ if sym = KEYWORD_SYMBOLS[identifier]
+ PascalSymbol.new sym
+ else
+ SymbolIdent.new identifier
+ end
+ end
+
+ MAXINT = 2**31 - 1
+ MAXINT_DIV_10 = MAXINT / 10
+ MAXINT_MOD_10 = MAXINT % 10
+ ##
+ # Versucht, aus dem gegebenen Zeichen und den folgenden eine Zahl zusammenzusetzen.
+ # Dabei wird der relativ intuitive Algorithmus benutzt, die endgültige Zahl bei
+ # jeder weiteren Ziffer mit 10 zu multiplizieren und diese dann mit der Ziffer zu
+ # addieren. Sonderfälle bestehen dann nur noch in der Behandlung von reellen Zahlen.
+ # <BR>
+ # Treten dabei kein Punkt oder ein E auf, so gibt diese Methode ein SymbolIntCon-Objekt
+ # zurück, ansonsten (reelle Zahl) ein SymbolRealCon-Objekt. Beide Symbole enthalten
+ # jeweils die Zahlwerte.
+ # <BR>
+ # Anmerkung: Diese Funktion ist mit Hilfe der Java/Ruby-API deutlich leichter zu realisieren.
+ # Sie wurde dennoch so implementiert, um den Algorithmus zu demonstrieren
+ # @see symbol
+ # @return SymbolIntcon- oder SymbolRealcon-Objekt, das den Zahlwert enthält
+ def number
+ is_integer = true
+ integer_too_long = false
+ exponent = 0
+ exp_counter = -1
+ exp_sign = 1
+
+ integer_mantisse = @current_char.digit
+
+ while (@next_char.digit? and integer_mantisse < MAXINT_DIV_10) or
+ (integer_mantisse == MAXINT_DIV_10 and @next_char.digit <= MAXINT_MOD_10)
+ integer_mantisse *= 10
+ integer_mantisse += @next_char.digit
+ read_next_char
+ end
+
+ real_mantisse = integer_mantisse
+
+ while @next_char.digit?
+ integer_too_long = true
+ real_mantisse *= 10
+ real_mantisse += @next_char.digit
+ read_next_char
+ end
+ if @next_char == ?.
+ read_next_char
+ is_integer = false
+ unless @next_char.digit?
+ Error.error 101, @line, @pos
+ end
+ while @next_char.digit?
+ real_mantisse += @next_char.digit * (10 ** exp_counter)
+ read_next_char
+ exp_counter -= 1
+ end
+ end
+ if @next_char == ?E
+ is_integer = false
+ read_next_char
+ if @next_char == ?-
+ exp_sign = -1
+ read_next_char
+ end
+ unless @next_char.digit?
+ Error.error 101, @line, @pos
+ end
+ while @next_char.digit?
+ exponent *= 10
+ exponent += @next_char.digit
+ read_next_char
+ end
+ end
+
+ if is_integer
+ if integer_too_long
+ Error.error 102, @line, @pos
+ end
+ SymbolIntcon.new integer_mantisse
+ else
+ SymbolRealcon.new real_mantisse * (10 ** (exp_sign * exponent))
+ end
+ end
+
+ ##
+ # Sorgt für ein Überlesen von Kommentaren.
+ # Es werden einfach alle Zeichen bis zu einer schließenden Klammer eingelesen
+ # und verworfen.
+ def comment
+ while @current_char != ?}
+ forbid_eop
+ new_line if @current_char == ?\n
+ read_next_char
+ end
+ end
+
+ def new_line
+ @line += 1
+ @pos = 0
+ end
+
+ def forbid_eop
+ if eop?
+ Error.error 103, @line, @pos
+ end
+ exit
+ end
+
+ def eop?
+ @current_char.nil?
+ end
+end
+
+##
+# Läßt ein Testprogramm ablaufen.
+# Dieses erzeugt sich ein Scanner-Objekt und ruft an diesem kontinuierlich bis zum Dateiende
+# get_symbol auf.
+if $0 == __FILE__
+ scan = Scanner.new(File.new(ARGV[0] || 'test.pas'))
+ loop do
+ c = scan.get_symbol
+ puts c
+ break if c.typ == TokenConsts::EOP
+ end
+end
+# -*- ruby -*-
+
+# Local variables:
+# indent-tabs-mode: nil
+# ruby-indent-level: 4
+# End:
+
+# @@PLEAC@@_NAME
+# @@SKIP@@ Ruby
+
+# @@PLEAC@@_WEB
+# @@SKIP@@ http://www.ruby-lang.org
+
+
+# @@PLEAC@@_1.0
+string = '\n' # two characters, \ and an n
+string = 'Jon \'Maddog\' Orwant' # literal single quotes
+
+string = "\n" # a "newline" character
+string = "Jon \"Maddog\" Orwant" # literal double quotes
+
+string = %q/Jon 'Maddog' Orwant/ # literal single quotes
+
+string = %q[Jon 'Maddog' Orwant] # literal single quotes
+string = %q{Jon 'Maddog' Orwant} # literal single quotes
+string = %q(Jon 'Maddog' Orwant) # literal single quotes
+string = %q<Jon 'Maddog' Orwant> # literal single quotes
+
+a = <<"EOF"
+This is a multiline here document
+terminated by EOF on a line by itself
+EOF
+
+
+# @@PLEAC@@_1.1
+value = string[offset,count]
+value = string[offset..-1]
+
+string[offset,count] = newstring
+string[offset..-1] = newtail
+
+# in Ruby we can also specify intervals by their two offsets
+value = string[offset..offs2]
+string[offset..offs2] = newstring
+
+leading, s1, s2, trailing = data.unpack("A5 x3 A8 A8 A*")
+
+fivers = string.unpack("A5" * (string.length/5))
+
+chars = string.unpack("A1" * string.length)
+
+string = "This is what you have"
+# +012345678901234567890 Indexing forwards (left to right)
+# 109876543210987654321- Indexing backwards (right to left)
+# note that 0 means 10 or 20, etc. above
+
+first = string[0, 1] # "T"
+start = string[5, 2] # "is"
+rest = string[13..-1] # "you have"
+last = string[-1, 1] # "e"
+end_ = string[-4..-1] # "have"
+piece = string[-8, 3] # "you"
+
+string[5, 2] = "wasn't" # change "is" to "wasn't"
+string[-12..-1] = "ondrous" # "This wasn't wondrous"
+string[0, 1] = "" # delete first character
+string[-10..-1] = "" # delete last 10 characters
+
+if string[-10..-1] =~ /pattern/
+ puts "Pattern matches in last 10 characters"
+end
+
+string[0, 5].gsub!(/is/, 'at')
+
+a = "make a hat"
+a[0, 1], a[-1, 1] = a[-1, 1], a[0, 1]
+
+a = "To be or not to be"
+b = a.unpack("x6 A6")
+
+b, c = a.unpack("x6 A2 X5 A2")
+puts "#{b}\n#{c}\n"
+
+def cut2fmt(*args)
+ template = ''
+ lastpos = 1
+ for place in args
+ template += "A" + (place - lastpos).to_s + " "
+ lastpos = place
+ end
+ template += "A*"
+ return template
+end
+
+fmt = cut2fmt(8, 14, 20, 26, 30)
+
+
+# @@PLEAC@@_1.2
+# careful! "b is true" doesn't mean "b != 0" (0 is true in Ruby)
+# thus no problem of "defined" later since only nil is false
+# the following sets to `c' if `b' is nil or false
+a = b || c
+
+# if you need Perl's behaviour (setting to `c' if `b' is 0) the most
+# effective way is to use Numeric#nonzero? (thanks to Dave Thomas!)
+a = b.nonzero? || c
+
+# you will still want to use defined? in order to test
+# for scope existence of a given object
+a = defined?(b) ? b : c
+
+dir = ARGV.shift || "/tmp"
+
+
+# @@PLEAC@@_1.3
+v1, v2 = v2, v1
+
+alpha, beta, production = %w(January March August)
+alpha, beta, production = beta, production, alpha
+
+
+# @@PLEAC@@_1.4
+num = char[0]
+char = num.chr
+
+# Ruby also supports having a char from character constant
+num = ?r
+
+char = sprintf("%c", num)
+printf("Number %d is character %c\n", num, num)
+
+ascii = string.unpack("C*")
+string = ascii.pack("C*")
+
+hal = "HAL"
+ascii = hal.unpack("C*")
+# We can't use Array#each since we can't mutate a Fixnum
+ascii.collect! { |i|
+ i + 1 # add one to each ASCII value
+}
+ibm = ascii.pack("C*")
+puts ibm
+
+
+# @@PLEAC@@_1.5
+array = string.split('')
+
+array = string.unpack("C*")
+
+string.scan(/./) { |b|
+ # do something with b
+}
+
+string = "an apple a day"
+print "unique chars are: ", string.split('').uniq.sort, "\n"
+
+sum = 0
+for ascval in string.unpack("C*") # or use Array#each for a pure OO style :)
+ sum += ascval
+end
+puts "sum is #{sum & 0xffffffff}" # since Ruby will go Bignum if necessary
+
+# @@INCLUDE@@ include/ruby/slowcat.rb
+
+
+# @@PLEAC@@_1.6
+revbytes = string.reverse
+
+revwords = string.split(" ").reverse.join(" ")
+
+revwords = string.split(/(\s+)/).reverse.join
+
+# using the fact that IO is Enumerable, you can directly "select" it
+long_palindromes = File.open("/usr/share/dict/words").
+ select { |w| w.chomp!; w.reverse == w && w.length > 5 }
+
+
+# @@PLEAC@@_1.7
+while string.sub!("\t+") { ' ' * ($&.length * 8 - $`.length % 8) }
+end
+
+
+# @@PLEAC@@_1.8
+'You owe #{debt} to me'.gsub(/\#{(\w+)}/) { eval($1) }
+
+rows, cols = 24, 80
+text = %q(I am #{rows} high and #{cols} long)
+text.gsub!(/\#{(\w+)}/) { eval("#{$1}") }
+puts text
+
+'I am 17 years old'.gsub(/\d+/) { 2 * $&.to_i }
+
+
+# @@PLEAC@@_1.9
+e = "bo peep".upcase
+e.downcase!
+e.capitalize!
+
+"thIS is a loNG liNE".gsub!(/\w+/) { $&.capitalize }
+
+
+# @@PLEAC@@_1.10
+"I have #{n+1} guanacos."
+print "I have ", n+1, " guanacos."
+
+
+# @@PLEAC@@_1.11
+var = <<'EOF'.gsub(/^\s+/, '')
+ your text
+ goes here
+EOF
+
+
+# @@PLEAC@@_1.12
+string = "Folding and splicing is the work of an editor,\n"+
+ "not a mere collection of silicon\n"+
+ "and\n"+
+ "mobile electrons!"
+
+def wrap(str, max_size)
+ all = []
+ line = ''
+ for l in str.split
+ if (line+l).length >= max_size
+ all.push(line)
+ line = ''
+ end
+ line += line == '' ? l : ' ' + l
+ end
+ all.push(line).join("\n")
+end
+
+print wrap(string, 20)
+#=> Folding and
+#=> splicing is the
+#=> work of an editor,
+#=> not a mere
+#=> collection of
+#=> silicon and mobile
+#=> electrons!
+
+
+# @@PLEAC@@_1.13
+string = %q(Mom said, "Don't do that.")
+string.gsub(/['"]/) { '\\'+$& }
+string.gsub(/['"]/, '\&\&')
+string.gsub(/[^A-Z]/) { '\\'+$& }
+"is a test!".gsub(/\W/) { '\\'+$& } # no function like quotemeta?
+
+
+# @@PLEAC@@_1.14
+string.strip!
+
+
+# @@PLEAC@@_1.15
+def parse_csv(text)
+ new = text.scan(/"([^\"\\]*(?:\\.[^\"\\]*)*)",?|([^,]+),?|,/)
+ new << nil if text[-1] == ?,
+ new.flatten.compact
+end
+
+line = %q<XYZZY,"","O'Reilly, Inc","Wall, Larry","a \"glug\" bit,",5,"Error, Core Dumped">
+fields = parse_csv(line)
+fields.each_with_index { |v,i|
+ print "#{i} : #{v}\n";
+}
+
+
+# @@PLEAC@@_1.16
+# Use the soundex.rb Library from Michael Neumann.
+# http://www.s-direktnet.de/homepages/neumann/rb_prgs/Soundex.rb
+require 'Soundex'
+
+code = Text::Soundex.soundex(string)
+codes = Text::Soundex.soundex(array)
+
+# substitution function for getpwent():
+# returns an array of user entries,
+# each entry contains the username and the full name
+def login_names
+ result = []
+ File.open("/etc/passwd") { |file|
+ file.each_line { |line|
+ next if line.match(/^#/)
+ cols = line.split(":")
+ result.push([cols[0], cols[4]])
+ }
+ }
+ result
+end
+
+puts "Lookup user: "
+user = STDIN.gets
+user.chomp!
+exit unless user
+name_code = Text::Soundex.soundex(user)
+
+splitter = Regexp.new('(\w+)[^,]*\b(\w+)')
+for username, fullname in login_names do
+ firstname, lastname = splitter.match(fullname)[1,2]
+ if name_code == Text::Soundex.soundex(username)
+ || name_code == Text::Soundex.soundex(firstname)
+ || name_code == Text::Soundex.soundex(lastname)
+ then
+ puts "#{username}: #{firstname} #{lastname}"
+ end
+end
+
+
+# @@PLEAC@@_1.17
+# @@INCLUDE@@ include/ruby/fixstyle.rb
+
+
+# @@PLEAC@@_1.18
+# @@INCLUDE@@ include/ruby/psgrep.rb
+
+
+# @@PLEAC@@_2.1
+# Matz tells that you can use Integer() for strict checked conversion.
+Integer("abc")
+#=> `Integer': invalid value for Integer: "abc" (ArgumentError)
+Integer("567")
+#=> 567
+
+# You may use Float() for floating point stuff
+Integer("56.7")
+#=> `Integer': invalid value for Integer: "56.7" (ArgumentError)
+Float("56.7")
+#=> 56.7
+
+# You may also use a regexp for that
+if string =~ /^[+-]?\d+$/
+ p 'is an integer'
+else
+ p 'is not'
+end
+
+if string =~ /^-?(?:\d+(?:\.\d*)?|\.\d+)$/
+ p 'is a decimal number'
+else
+ p 'is not'
+end
+
+
+# @@PLEAC@@_2.2
+# equal(num1, num2, accuracy) : returns true if num1 and num2 are
+# equal to accuracy number of decimal places
+def equal(i, j, a)
+ sprintf("%.#{a}g", i) == sprintf("%.#{a}g", j)
+end
+
+wage = 536 # $5.36/hour
+week = 40 * wage # $214.40
+printf("One week's wage is: \$%.2f\n", week/100.0)
+
+
+# @@PLEAC@@_2.3
+num.round # rounds to integer
+
+a = 0.255
+b = sprintf("%.2f", a)
+print "Unrounded: #{a}\nRounded: #{b}\n"
+printf "Unrounded: #{a}\nRounded: %.2f\n", a
+
+print "number\tint\tfloor\tceil\n"
+a = [ 3.3 , 3.5 , 3.7, -3.3 ]
+for n in a
+ printf("% .1f\t% .1f\t% .1f\t% .1f\n", # at least I don't fake my output :)
+ n, n.to_i, n.floor, n.ceil)
+end
+
+
+# @@PLEAC@@_2.4
+def dec2bin(n)
+ [n].pack("N").unpack("B32")[0].sub(/^0+(?=\d)/, '')
+end
+
+def bin2dec(n)
+ [("0"*32+n.to_s)[-32..-1]].pack("B32").unpack("N")[0]
+end
+
+
+# @@PLEAC@@_2.5
+for i in x .. y
+ # i is set to every integer from x to y, inclusive
+end
+
+x.step(y,7) { |i|
+ # i is set to every integer from x to y, stepsize = 7
+}
+
+print "Infancy is: "
+(0..2).each { |i|
+ print i, " "
+}
+print "\n"
+
+
+# @@PLEAC@@_2.6
+# We can add conversion methods to the Integer class,
+# this makes a roman number just a representation for normal numbers.
+class Integer
+
+ @@romanlist = [["M", 1000],
+ ["CM", 900],
+ ["D", 500],
+ ["CD", 400],
+ ["C", 100],
+ ["XC", 90],
+ ["L", 50],
+ ["XL", 40],
+ ["X", 10],
+ ["IX", 9],
+ ["V", 5],
+ ["IV", 4],
+ ["I", 1]]
+
+ def to_roman
+ remains = self
+ roman = ""
+ for sym, num in @@romanlist
+ while remains >= num
+ remains -= num
+ roman << sym
+ end
+ end
+ roman
+ end
+
+ def Integer.from_roman(roman)
+ ustr = roman.upcase
+ sum = 0
+ for entry in @@romanlist
+ sym, num = entry[0], entry[1]
+ while sym == ustr[0, sym.length]
+ sum += num
+ ustr.slice!(0, sym.length)
+ end
+ end
+ sum
+ end
+
+end
+
+
+roman_fifteen = 15.to_roman
+puts "Roman for fifteen is #{roman_fifteen}"
+i = Integer.from_roman(roman_fifteen)
+puts "Converted back, #{roman_fifteen} is #{i}"
+
+# check
+for i in (1..3900)
+ r = i.to_roman
+ j = Integer.from_roman(r)
+ if i != j
+ puts "error: #{i} : #{r} - #{j}"
+ end
+end
+
+
+# @@PLEAC@@_2.7
+random = rand(y-x+1)+x
+
+chars = ["A".."Z","a".."z","0".."9"].collect { |r| r.to_a }.join + %q(!@$%^&*)
+password = (1..8).collect { chars[rand(chars.size)] }.pack("C*")
+
+
+# @@PLEAC@@_2.8
+srand # uses a combination of the time, the process id, and a sequence number
+srand(val) # for repeatable behaviour
+
+
+# @@PLEAC@@_2.9
+# from the randomr lib:
+# http://raa.ruby-lang.org/project/randomr/
+----> http://raa.ruby-lang.org/project/randomr/
+
+require 'random/mersenne_twister'
+mers = Random::MersenneTwister.new 123456789
+puts mers.rand(0) # 0.550321932544541
+puts mers.rand(10) # 2
+
+# using online sources of random data via the realrand package:
+# http://raa.ruby-lang.org/project/realrand/
+# **Note**
+# The following online services are used in this package:
+# http://www.random.org - source: atmospheric noise
+# http://www.fourmilab.ch/hotbits - source: radioactive decay timings
+# http://random.hd.org - source: entropy from local and network noise
+# Please visit the sites and respect the rules of each service.
+
+require 'random/online'
+
+generator1 = Random::RandomOrg.new
+puts generator1.randbyte(5).join(",")
+puts generator1.randnum(10, 1, 6).join(",") # Roll dice 10 times.
+
+generator2 = Random::FourmiLab.new
+puts generator2.randbyte(5).join(",")
+# randnum is not supported.
+
+generator3 = Random::EntropyPool.new
+puts generator3.randbyte(5).join(",")
+# randnum is not supported.
+
+
+# @@PLEAC@@_2.10
+def gaussian_rand
+ begin
+ u1 = 2 * rand() - 1
+ u2 = 2 * rand() - 1
+ w = u1*u1 + u2*u2
+ end while (w >= 1)
+ w = Math.sqrt((-2*Math.log(w))/w)
+ [ u2*w, u1*w ]
+end
+
+mean = 25
+sdev = 2
+salary = gaussian_rand[0] * sdev + mean
+printf("You have been hired at \$%.2f\n", salary)
+
+
+# @@PLEAC@@_2.11
+def deg2rad(d)
+ (d/180.0)*Math::PI
+end
+
+def rad2deg(r)
+ (r/Math::PI)*180
+end
+
+
+# @@PLEAC@@_2.12
+sin_val = Math.sin(angle)
+cos_val = Math.cos(angle)
+tan_val = Math.tan(angle)
+
+# AFAIK Ruby's Math module doesn't provide acos/asin
+# While we're at it, let's also define missing hyperbolic functions
+module Math
+ def Math.asin(x)
+ atan2(x, sqrt(1 - x**2))
+ end
+ def Math.acos(x)
+ atan2(sqrt(1 - x**2), x)
+ end
+ def Math.atan(x)
+ atan2(x, 1)
+ end
+ def Math.sinh(x)
+ (exp(x) - exp(-x)) / 2
+ end
+ def Math.cosh(x)
+ (exp(x) + exp(-x)) / 2
+ end
+ def Math.tanh(x)
+ sinh(x) / cosh(x)
+ end
+end
+
+# The support for Complex numbers is not built-in
+y = Math.acos(3.7)
+#=> in `sqrt': square root for negative number (ArgumentError)
+
+# There is an implementation of Complex numbers in 'complex.rb' in current
+# Ruby distro, but it doesn't support atan2 with complex args, so it doesn't
+# solve this problem.
+
+
+# @@PLEAC@@_2.13
+log_e = Math.log(val)
+log_10 = Math.log10(val)
+
+def log_base(base, val)
+ Math.log(val)/Math.log(base)
+end
+
+answer = log_base(10, 10_000)
+puts "log10(10,000) = #{answer}"
+
+
+# @@PLEAC@@_2.14
+require 'matrix.rb'
+
+a = Matrix[[3, 2, 3], [5, 9, 8]]
+b = Matrix[[4, 7], [9, 3], [8, 1]]
+c = a * b
+
+a.row_size
+a.column_size
+
+c.det
+a.transpose
+
+
+# @@PLEAC@@_2.15
+require 'complex.rb'
+require 'rational.rb'
+
+a = Complex(3, 5) # 3 + 5i
+b = Complex(2, -2) # 2 - 2i
+puts "c = #{a*b}"
+
+c = a * b
+d = 3 + 4*Complex::I
+
+printf "sqrt(#{d}) = %s\n", Math.sqrt(d)
+
+
+# @@PLEAC@@_2.16
+number = hexadecimal.hex
+number = octal.oct
+
+print "Gimme a number in decimal, octal, or hex: "
+num = gets.chomp
+exit unless defined?(num)
+num = num.oct if num =~ /^0/ # does both oct and hex
+printf "%d %x %o\n", num, num, num
+
+print "Enter file permission in octal: "
+permissions = gets.chomp
+raise "Exiting ...\n" unless defined?(permissions)
+puts "The decimal value is #{permissions.oct}"
+
+
+# @@PLEAC@@_2.17
+def commify(n)
+ n.to_s =~ /([^\.]*)(\..*)?/
+ int, dec = $1.reverse, $2 ? $2 : ""
+ while int.gsub!(/(,|\.|^)(\d{3})(\d)/, '\1\2,\3')
+ end
+ int.reverse + dec
+end
+
+
+# @@PLEAC@@_2.18
+printf "It took %d hour%s\n", time, time == 1 ? "" : "s"
+
+# dunno if an equivalent to Lingua::EN::Inflect exists...
+
+
+# @@PLEAC@@_2.19
+#-----------------------------
+#!/usr/bin/ruby
+# bigfact - calculating prime factors
+def factorize(orig)
+ factors = {}
+ factors.default = 0 # return 0 instead nil if key not found in hash
+ n = orig
+ i = 2
+ sqi = 4 # square of i
+ while sqi <= n do
+ while n.modulo(i) == 0 do
+ n /= i
+ factors[i] += 1
+ # puts "Found factor #{i}"
+ end
+ # we take advantage of the fact that (i +1)**2 = i**2 + 2*i +1
+ sqi += 2 * i + 1
+ i += 1
+ end
+
+ if (n != 1) && (n != orig)
+ factors[n] += 1
+ end
+ factors
+end
+
+def printfactorhash(orig, factorcount)
+ print format("%-10d ", orig)
+ if factorcount.length == 0
+ print "PRIME"
+ else
+ # sorts after number, because the hash keys are numbers
+ factorcount.sort.each { |factor,exponent|
+ print factor
+ if exponent > 1
+ print "**", exponent
+ end
+ print " "
+ }
+ end
+ puts
+end
+
+for arg in ARGV
+ n = arg.to_i
+ mfactors = factorize(n)
+ printfactorhash(n, mfactors)
+end
+#-----------------------------
+
+
+# @@PLEAC@@_3.0
+puts Time.now
+
+print "Today is day ", Time.now.yday, " of the current year.\n"
+print "Today is day ", Time.now.day, " of the current month.\n"
+
+
+# @@PLEAC@@_3.1
+day, month, year = Time.now.day, Time.now.month, Time.now.year
+# or
+day, month, year = Time.now.to_a[3..5]
+
+tl = Time.now.localtime
+printf("The current date is %04d %02d %02d\n", tl.year, tl.month, tl.day)
+
+Time.now.localtime.strftime("%Y-%m-%d")
+
+
+# @@PLEAC@@_3.2
+Time.local(year, month, day, hour, minute, second).tv_sec
+Time.gm(year, month, day, hour, minute, second).tv_sec
+
+
+# @@PLEAC@@_3.3
+sec, min, hour, day, month, year, wday, yday, isdst, zone = Time.at(epoch_secs).to_a
+
+
+# @@PLEAC@@_3.4
+when_ = now + difference # now -> Time ; difference -> Numeric (delta in seconds)
+then_ = now - difference
+
+
+# @@PLEAC@@_3.5
+bree = 361535725
+nat = 96201950
+
+difference = bree - nat
+puts "There were #{difference} seconds between Nat and Bree"
+
+seconds = difference % 60
+difference = (difference - seconds) / 60
+minutes = difference % 60
+difference = (difference - minutes) / 60
+hours = difference % 24
+difference = (difference - hours) / 24
+days = difference % 7
+weeks = (difference - days) / 7
+
+puts "(#{weeks} weeks, #{days} days, #{hours}:#{minutes}:#{seconds})"
+
+
+# @@PLEAC@@_3.6
+monthday, weekday, yearday = date.mday, date.wday, date.yday
+
+# AFAIK the week number is not just a division since week boundaries are on sundays
+weeknum = d.strftime("%U").to_i + 1
+
+year = 1981
+month = "jun" # or `6' if you want to emulate a broken language
+day = 16
+t = Time.mktime(year, month, day)
+print "#{month}/#{day}/#{year} was a ", t.strftime("%A"), "\n"
+
+
+# @@PLEAC@@_3.7
+yyyy, mm, dd = $1, $2, $3 if "1998-06-25" =~ /(\d+)-(\d+)-(\d+)/
+
+epoch_seconds = Time.mktime(yyyy, mm, dd).tv_sec
+
+# dunno an equivalent to Date::Manip#ParseDate
+
+
+# @@PLEAC@@_3.8
+string = Time.at(epoch_secs)
+Time.at(1234567890).gmtime # gives: Fri Feb 13 23:31:30 UTC 2009
+
+time = Time.mktime(1973, "jan", 18, 3, 45, 50)
+print "In localtime it gives: ", time.localtime, "\n"
+
+
+# @@PLEAC@@_3.9
+# Ruby provides micro-seconds in Time object
+Time.now.usec
+
+# Ruby gives the seconds in floating format when substracting two Time objects
+before = Time.now
+line = gets
+elapsed = Time.now - before
+puts "You took #{elapsed} seconds."
+
+# On my Celeron-400 with Linux-2.2.19-14mdk, average for three execs are:
+# This Ruby version: average 0.00321 sec
+# Cookbook's Perl version: average 0.00981 sec
+size = 500
+number_of_times = 100
+total_time = 0
+number_of_times.times {
+ # populate array
+ array = []
+ size.times { array << rand }
+ # sort it
+ begin_ = Time.now
+ array.sort!
+ time = Time.now - begin_
+ total_time += time
+}
+printf "On average, sorting %d random numbers takes %.5f seconds\n",
+ size, (total_time/Float(number_of_times))
+
+
+# @@PLEAC@@_3.10
+sleep(0.005) # Ruby is definitely not as broken as Perl :)
+# (may be interrupted by sending the process a SIGALRM)
+
+
+# @@PLEAC@@_3.11
+#!/usr/bin/ruby -w
+# hopdelta - feed mail header, produce lines
+# showing delay at each hop.
+require 'time'
+class MailHopDelta
+
+ def initialize(mail)
+ @head = mail.gsub(/\n\s+/,' ')
+ @topline = %w-Sender Recipient Time Delta-
+ @start_from = mail.match(/^From.*\@([^\s>]*)/)[1]
+ @date = Time.parse(mail.match(/^Date:\s+(.*)/)[1])
+ end
+
+ def out(line)
+ "%-20.20s %-20.20s %-20.20s %s" % line
+ end
+
+ def hop_date(day)
+ day.strftime("%I:%M:%S %Y/%m/%d")
+ end
+
+ def puts_hops
+ puts out(@topline)
+ puts out(['Start', @start_from, hop_date(@date),''])
+ @head.split(/\n/).reverse.grep(/^Received:/).each do |hop|
+ hop.gsub!(/\bon (.*?) (id.*)/,'; \1')
+ whence = hop.match(/;\s+(.*)$/)[1]
+ unless whence
+ warn "Bad received line: #{hop}"
+ next
+ end
+ from = $+ if hop =~ /from\s+(\S+)|\((.*?)\)/
+ by = $1 if hop =~ /by\s+(\S+\.\S+)/
+ next unless now = Time.parse(whence).localtime
+ delta = now - @date
+ puts out([from, by, hop_date(now), hop_time(delta)])
+ @date = now
+ end
+ end
+
+ def hop_time(secs)
+ sign = secs < 0 ? -1 : 1
+ days, secs = secs.abs.divmod(60 * 60 * 24)
+ hours,secs = secs.abs.divmod(60 * 60)
+ mins, secs = secs.abs.divmod(60)
+ rtn = "%3ds" % [secs * sign]
+ rtn << "%3dm" % [mins * sign] if mins != 0
+ rtn << "%3dh" % [hours * sign] if hours != 0
+ rtn << "%3dd" % [days * sign] if days != 0
+ rtn
+ end
+end
+
+$/ = ""
+mail = MailHopDelta.new(ARGF.gets).puts_hops
+
+
+# @@PLEAC@@_4.0
+single_level = [ "this", "that", "the", "other" ]
+
+# Ruby directly supports nested arrays
+double_level = [ "this", "that", [ "the", "other" ] ]
+still_single_level = [ "this", "that", [ "the", "other" ] ].flatten
+
+
+# @@PLEAC@@_4.1
+a = [ "quick", "brown", "fox" ]
+a = %w(Why are you teasing me?)
+
+lines = <<"END_OF_HERE_DOC".gsub(/^\s*(.+)/, '\1')
+ The boy stood on the burning deck,
+ It was as hot as glass.
+END_OF_HERE_DOC
+
+bigarray = IO.readlines("mydatafile").collect { |l| l.chomp }
+
+name = "Gandalf"
+banner = %Q(Speak, #{name}, and welcome!)
+
+host_info = `host #{his_host}`
+
+%x(ps #{$$})
+
+banner = 'Costs only $4.95'.split(' ')
+
+rax = %w! ( ) < > { } [ ] !
+
+
+# @@PLEAC@@_4.2
+def commify_series(a)
+ a.size == 0 ? '' :
+ a.size == 1 ? a[0] :
+ a.size == 2 ? a.join(' and ') :
+ a[0..-2].join(', ') + ', and ' + a[-1]
+end
+
+array = [ "red", "yellow", "green" ]
+
+print "I have ", array, " marbles\n"
+# -> I have redyellowgreen marbles
+
+# But unlike Perl:
+print "I have #{array} marbles\n"
+# -> I have redyellowgreen marbles
+# So, needs:
+print "I have #{array.join(' ')} marbles\n"
+# -> I have red yellow green marbles
+
+def commify_series(a)
+ sepchar = a.select { |p| p =~ /,/ } != [] ? '; ' : ', '
+ a.size == 0 ? '' :
+ a.size == 1 ? a[0] :
+ a.size == 2 ? a.join(' and ') :
+ a[0..-2].join(sepchar) + sepchar + 'and ' + a[-1]
+end
+
+
+# @@PLEAC@@_4.3
+# (note: AFAIK Ruby doesn't allow gory change of Array length)
+# grow the array by assigning nil to past the end of array
+ary[new_size-1] = nil
+# shrink the array by slicing it down
+ary.slice!(new_size..-1)
+# init the array with given size
+Array.new(number_of_elems)
+# assign to an element past the original end enlarges the array
+ary[index_new_last_elem] = value
+
+def what_about_that_array(a)
+ print "The array now has ", a.size, " elements.\n"
+ # Index of last element is not really interesting in Ruby
+ print "Element #3 is `#{a[3]}'.\n"
+end
+people = %w(Crosby Stills Nash Young)
+what_about_that_array(people)
+
+
+# @@PLEAC@@_4.4
+# OO style
+bad_users.each { |user|
+ complain(user)
+}
+# or, functional style
+for user in bad_users
+ complain(user)
+end
+
+for var in ENV.keys.sort
+ puts "#{var}=#{ENV[var]}"
+end
+
+for user in all_users
+ disk_space = get_usage(user)
+ if (disk_space > MAX_QUOTA)
+ complain(user)
+ end
+end
+
+for l in IO.popen("who").readlines
+ print l if l =~ /^gc/
+end
+
+# we can mimic the obfuscated Perl way
+while fh.gets # $_ is set to the line just read
+ chomp # $_ has a trailing \n removed, if it had one
+ split.each { |w| # $_ is split on whitespace
+ # but $_ is not set to each chunk as in Perl
+ print w.reverse
+ }
+end
+# ...or use a cleaner way
+for l in fh.readlines
+ l.chomp.split.each { |w| print w.reverse }
+end
+
+# same drawback as in problem 1.4, we can't mutate a Numeric...
+array.collect! { |v| v - 1 }
+
+a = [ .5, 3 ]; b = [ 0, 1 ]
+for ary in [ a, b ]
+ ary.collect! { |v| v * 7 }
+end
+puts "#{a.join(' ')} #{b.join(' ')}"
+
+# we can mutate Strings, cool; we need a trick for the scalar
+for ary in [ [ scalar ], array, hash.values ]
+ ary.each { |v| v.strip! } # String#strip rules :)
+end
+
+
+# @@PLEAC@@_4.5
+# not relevant in Ruby since we have always references
+for item in array
+ # do somethingh with item
+end
+
+
+# @@PLEAC@@_4.6
+unique = list.uniq
+
+# generate a list of users logged in, removing duplicates
+users = `who`.collect { |l| l =~ /(\w+)/; $1 }.sort.uniq
+puts("users logged in: #{commify_series(users)}") # see 4.2 for commify_series
+
+
+# @@PLEAC@@_4.7
+a - b
+# [ 1, 1, 2, 2, 3, 3, 3, 4, 5 ] - [ 1, 2, 4 ] -> [3, 5]
+
+
+# @@PLEAC@@_4.8
+union = a | b
+intersection = a & b
+difference = a - b
+
+
+# @@PLEAC@@_4.9
+array1.concat(array2)
+# if you will assign to another object, better use:
+new_ary = array1 + array2
+
+members = [ "Time", "Flies" ]
+initiates = [ "An", "Arrow" ]
+members += initiates
+
+members = [ "Time", "Flies" ]
+initiates = [ "An", "Arrow" ]
+members[2,0] = [ "Like", initiates ].flatten
+
+members[0] = "Fruit"
+members[3,2] = "A", "Banana"
+
+
+# @@PLEAC@@_4.10
+reversed = ary.reverse
+
+ary.reverse_each { |e|
+ # do something with e
+}
+
+descending = ary.sort.reverse
+descending = ary.sort { |a,b| b <=> a }
+
+
+# @@PLEAC@@_4.11
+# remove n elements from front of ary (shift n)
+front = ary.slice!(0, n)
+
+# remove n elements from the end of ary (pop n)
+end_ = ary.slice!(-n .. -1)
+
+# let's extend the Array class, to make that useful
+class Array
+ def shift2()
+ slice!(0 .. 1) # more symetric with pop2...
+ end
+ def pop2()
+ slice!(-2 .. -1)
+ end
+end
+
+friends = %w(Peter Paul Mary Jim Tim)
+this, that = friends.shift2
+
+beverages = %w(Dew Jolt Cola Sprite Fresca)
+pair = beverages.pop2
+
+
+# @@PLEAC@@_4.12
+# use Enumerable#detect (or the synonym Enumerable#find)
+highest_eng = employees.detect { |emp| emp.category == 'engineer' }
+
+
+# @@PLEAC@@_4.13
+# use Enumerable#select (or the synonym Enumerable#find_all)
+bigs = nums.select { |i| i > 1_000_000 }
+pigs = users.keys.select { |k| users[k] > 1e7 }
+
+matching = `who`.select { |u| u =~ /^gnat / }
+
+engineers = employees.select { |e| e.position == 'Engineer' }
+
+secondary_assistance = applicants.select { |a|
+ a.income >= 26_000 && a.income < 30_000
+}
+
+
+# @@PLEAC@@_4.14
+# normally you would have an array of Numeric (Float or
+# Fixnum or Bignum), so you would use:
+sorted = unsorted.sort
+# if you have strings representing Integers or Floats
+# you may specify another sort method:
+sorted = unsorted.sort { |a,b| a.to_f <=> b.to_f }
+
+# let's use the list of my own PID's
+`ps ux`.split("\n")[1..-1].
+ select { |i| i =~ /^#{ENV['USER']}/ }.
+ collect { |i| i.split[1] }.
+ sort { |a,b| a.to_i <=> b.to_i }.each { |i| puts i }
+puts "Select a process ID to kill:"
+pid = gets.chomp
+raise "Exiting ... \n" unless pid && pid =~ /^\d+$/
+Process.kill('TERM', pid.to_i)
+sleep 2
+Process.kill('KILL', pid.to_i)
+
+descending = unsorted.sort { |a,b| b.to_f <=> a.to_f }
+
+
+# @@PLEAC@@_4.15
+ordered = unordered.sort { |a,b| compare(a,b) }
+
+precomputed = unordered.collect { |e| [compute, e] }
+ordered_precomputed = precomputed.sort { |a,b| a[0] <=> b[0] }
+ordered = ordered_precomputed.collect { |e| e[1] }
+
+ordered = unordered.collect { |e| [compute, e] }.
+ sort { |a,b| a[0] <=> b[0] }.
+ collect { |e| e[1] }
+
+for employee in employees.sort { |a,b| a.name <=> b.name }
+ print employee.name, " earns \$ ", employee.salary, "\n"
+end
+
+# Beware! `0' is true in Ruby.
+# For chaining comparisons, you may use Numeric#nonzero?, which
+# returns num if num is not zero, nil otherwise
+sorted = employees.sort { |a,b| (a.name <=> b.name).nonzero? || b.age <=> a.age }
+
+users = []
+# getpwent is not wrapped in Ruby... let's fallback
+IO.readlines('/etc/passwd').each { |u| users << u.split(':') }
+users.sort! { |a,b| a[0] <=> b[0] }
+for user in users
+ puts user[0]
+end
+
+sorted = names.sort { |a,b| a[1, 1] <=> b[1, 1] }
+sorted = strings.sort { |a,b| a.length <=> b.length }
+
+# let's show only the compact version
+ordered = strings.collect { |e| [e.length, e] }.
+ sort { |a,b| a[0] <=> b[0] }.
+ collect { |e| e[1] }
+
+ordered = strings.collect { |e| [/\d+/.match(e)[0].to_i, e] }.
+ sort { |a,b| a[0] <=> b[0] }.
+ collect { |e| e[1] }
+
+print `cat /etc/passwd`.collect { |e| [e, e.split(':').indexes(3,2,0)].flatten }.
+ sort { |a,b| (a[1] <=> b[1]).nonzero? || (a[2] <=> b[2]).nonzero? || a[3] <=> b[3] }.
+ collect { |e| e[0] }
+
+
+# @@PLEAC@@_4.16
+circular.unshift(circular.pop) # the last shall be first
+circular.push(circular.shift) # and vice versa
+
+def grab_and_rotate(l)
+ l.push(ret = l.shift)
+ ret
+end
+
+processes = [1, 2, 3, 4, 5]
+while (1)
+ process = grab_and_rotate(processes)
+ puts "Handling process #{process}"
+ sleep 1
+end
+
+
+# @@PLEAC@@_4.17
+def fisher_yates_shuffle(a)
+ (a.size-1).downto(1) { |i|
+ j = rand(i+1)
+ a[i], a[j] = a[j], a[i] if i != j
+ }
+end
+
+def naive_shuffle(a)
+ for i in 0...a.size
+ j = rand(a.size)
+ a[i], a[j] = a[j], a[i]
+ end
+end
+
+
+# @@PLEAC@@_4.18
+#!/usr/bin/env ruby
+# example 4-2 words
+# words - gather lines, present in colums
+
+# class to encapsulate the word formatting from the input
+class WordFormatter
+ def initialize(cols)
+ @cols = cols
+ end
+
+ # helper to return the length of the longest word in the wordlist
+ def maxlen(wordlist)
+ max = 1
+ for word in wordlist
+ if word.length > max
+ max = word.length
+ end
+ end
+ max
+ end
+
+ # process the wordlist and print it formmated into columns
+ def output(wordlist)
+ collen = maxlen(wordlist) + 1
+ columns = @cols / collen
+ columns = 1 if columns == 0
+ rows = (wordlist.length + columns - 1) / columns
+ # now process each item, picking out proper piece for this position
+ 0.upto(rows * columns - 1) { |item|
+ target = (item % columns) * rows + (item / columns)
+ eol = ((item+1) % columns == 0)
+ piece = wordlist[target] || ""
+ piece = piece.ljust(collen) unless eol
+ print piece
+ puts if eol
+ }
+ # no need to finish it up, because eol is always true for the last element
+ end
+end
+
+# get nr of chars that fit in window or console, see PLEAC 15.4
+# not portable -- linux only (?)
+def getWinCharWidth()
+ buf = "\0" * 8
+ $stdout.ioctl(0x5413, buf)
+ ws_row, ws_col, ws_xpixel, ws_ypixel = buf.unpack("$4")
+ ws_col || 80
+rescue
+ 80
+end
+
+# main program
+cols = getWinCharWidth()
+formatter = WordFormatter.new(cols)
+words = readlines()
+words.collect! { |line|
+ line.chomp
+}
+formatter.output(words)
+
+
+# @@PLEAC@@_4.19
+# In ruby, Fixnum's are automatically converted to Bignum's when
+# needed, so there is no need for an extra module
+def factorial(n)
+ s = 1
+ while n > 0
+ s *= n
+ n -= 1
+ end
+ s
+end
+
+puts factorial(500)
+
+#---------------------------------------------------------
+# Example 4-3. tsc-permute
+# tsc_permute: permute each word of input
+def permute(items, perms)
+ unless items.length > 0
+ puts perms.join(" ")
+ else
+ for i in items
+ newitems = items.dup
+ newperms = perms.dup
+ newperms.unshift(newitems.delete(i))
+ permute(newitems, newperms)
+ end
+ end
+end
+# In ruby the main program must be after all definitions it is using
+permute(ARGV, [])
+
+#---------------------------------------------------------
+# mjd_permute: permute each word of input
+
+def factorial(n)
+ s = 1
+ while n > 0
+ s *= n
+ n -= 1
+ end
+ s
+end
+
+# we use a class with a class variable store the private cache
+# for the results of the factorial function.
+class Factorial
+ @@fact = [ 1 ]
+ def Factorial.compute(n)
+ if @@fact[n]
+ @@fact[n]
+ else
+ @@fact[n] = n * Factorial.compute(n - 1)
+ end
+ end
+end
+
+#---------------------------------------------------------
+# Example 4-4- mjd-permute
+# n2pat(n, len): produce the N-th pattern of length len
+
+# We must use a lower case letter as parameter N, otherwise it is
+# handled as constant Length is the length of the resulting
+# array, not the index of the last element (length -1) like in
+# the perl example.
+def n2pat(n, length)
+ pat = []
+ i = 1
+ while i <= length
+ pat.push(n % i)
+ n /= i
+ i += 1
+ end
+ pat
+end
+
+# pat2perm(pat): turn pattern returned by n2pat() into
+# permutation of integers.
+def pat2perm(pat)
+ source = (0 .. pat.length - 1).to_a
+ perm = []
+ perm.push(source.slice!(pat.pop)) while pat.length > 0
+ perm
+end
+
+def n2perm(n, len)
+ pat2perm(n2pat(n,len))
+end
+
+# In ruby the main program must be after all definitions
+while gets
+ data = split
+ # the perl solution has used $#data, which is length-1
+ num_permutations = Factorial.compute(data.length())
+ 0.upto(num_permutations - 1) do |i|
+ # in ruby we can not use an array as selector for an array
+ # but by exchanging the two arrays, we can use the collect method
+ # which returns an array with the result of all block invocations
+ permutation = n2perm(i, data.length).collect {
+ |j| data[j]
+ }
+ puts permutation.join(" ")
+ end
+end
+
+
+# @@PLEAC@@_5.0
+age = { "Nat", 24,
+ "Jules", 25,
+ "Josh", 17 }
+
+age["Nat"] = 24
+age["Jules"] = 25
+age["Josh"] = 17
+
+food_color = {
+ "Apple" => "red",
+ "Banana" => "yellow",
+ "Lemon" => "yellow",
+ "Carrot" => "orange"
+ }
+
+# In Ruby, you cannot avoid the double or simple quoting
+# while manipulatin hashes
+
+
+# @@PLEAC@@_5.1
+hash[key] = value
+
+food_color["Raspberry"] = "pink"
+puts "Known foods:", food_color.keys
+
+
+# @@PLEAC@@_5.2
+# does hash have a value for key ?
+if (hash.has_key?(key))
+ # it exists
+else
+ # it doesn't
+end
+
+[ "Banana", "Martini" ].each { |name|
+ print name, " is a ", food_color.has_key?(name) ? "food" : "drink", "\n"
+}
+
+age = {}
+age['Toddler'] = 3
+age['Unborn'] = 0
+age['Phantasm'] = nil
+
+for thing in ['Toddler', 'Unborn', 'Phantasm', 'Relic']
+ print "#{thing}: "
+ print "Has-key " if age.has_key?(thing)
+ print "True " if age[thing]
+ print "Nonzero " if age[thing] && age[thing].nonzero?
+ print "\n"
+end
+
+#=>
+# Toddler: Has-key True Nonzero
+# Unborn: Has-key True
+# Phantasm: Has-key
+# Relic:
+
+# You use Hash#has_key? when you use Perl's exists -> it checks
+# for existence of a key in a hash.
+# All Numeric are "True" in ruby, so the test doesn't have the
+# same semantics as in Perl; you would use Numeric#nonzero? to
+# achieve the same semantics (false if 0, true otherwise).
+
+
+# @@PLEAC@@_5.3
+food_color.delete("Banana")
+
+
+# @@PLEAC@@_5.4
+hash.each { |key, value|
+ # do something with key and value
+}
+
+hash.each_key { |key|
+ # do something with key
+}
+
+food_color.each { |food, color|
+ puts "#{food} is #{color}"
+}
+
+food_color.each_key { |food|
+ puts "#{food} is #{food_color[food]}"
+}
+
+# IMO this demonstrates that OO style is by far more readable
+food_color.keys.sort.each { |food|
+ puts "#{food} is #{food_color[food]}."
+}
+
+#-----------------------------
+#!/usr/bin/ruby
+# countfrom - count number of messages from each sender
+
+# Default value is 0
+from = Hash.new(0)
+while gets
+ /^From: (.*)/ and from[$1] += 1
+end
+
+# More useful to sort by number of received mail by person
+from.sort {|a,b| b[1]<=>a[1]}.each { |v|
+ puts "#{v[1]}: #{v[0]}"
+}
+#-----------------------------
+
+
+# @@PLEAC@@_5.5
+# You may use the built-in 'inspect' method this way:
+p hash
+
+# Or do it the Cookbook way:
+hash.each { |k,v| puts "#{k} => #{v}" }
+
+# Sorted by keys
+hash.sort.each { |e| puts "#{e[0]} => #{e[1]}" }
+# Sorted by values
+hash.sort{|a,b| a[1]<=>b[1]}.each { |e| puts "#{e[0]} => #{e[1]}" }
+
+
+# @@PLEAC@@_5.7
+ttys = Hash.new
+for i in `who`
+ user, tty = i.split
+ (ttys[user] ||= []) << tty # see problems_ruby for more infos
+end
+ttys.keys.sort.each { |k|
+ puts "#{k}: #{commify_series(ttys[k])}" # from 4.2
+}
+
+
+# @@PLEAC@@_5.8
+surname = { "Mickey" => "Mantle", "Babe" => "Ruth" }
+puts surname.index("Mantle")
+
+# If you really needed to 'invert' the whole hash, use Hash#invert
+
+#-----------------------------
+#!/usr/bin/ruby -w
+# foodfind - find match for food or color
+
+given = ARGV.shift or raise "usage: foodfind food_or_color"
+
+color = {
+ "Apple" => "red",
+ "Banana" => "yellow",
+ "Lemon" => "yellow",
+ "Carrot" => "orange",
+}
+
+if (color.has_key?(given))
+ puts "#{given} is a food with color #{color[given]}."
+end
+if (color.has_value?(given))
+ puts "#{color.index(given)} is a food with color #{given}."
+end
+#-----------------------------
+
+
+# @@PLEAC@@_5.9
+# Sorted by keys (Hash#sort gives an Array of pairs made of each key,value)
+food_color.sort.each { |f|
+ puts "#{f[0]} is #{f[1]}."
+}
+
+# Sorted by values
+food_color.sort { |a,b| a[1] <=> b[1] }.each { |f|
+ puts "#{f[0]} is #{f[1]}."
+}
+
+# Sorted by length of values
+food_color.sort { |a,b| a[1].length <=> b[1].length }.each { |f|
+ puts "#{f[0]} is #{f[1]}."
+}
+
+
+# @@PLEAC@@_5.10
+merged = a.clone.update(b) # because Hash#update changes object in place
+
+drink_color = { "Galliano" => "yellow", "Mai Tai" => "blue" }
+ingested_color = drink_color.clone.update(food_color)
+
+substance_color = {}
+for i in [ food_color, drink_color ]
+ i.each_key { |k|
+ if substance_color.has_key?(k)
+ puts "Warning: #{k} seen twice. Using the first definition."
+ next
+ end
+ substance_color[k] = 1
+ }
+end
+
+
+# @@PLEAC@@_5.11
+common = hash1.keys & hash2.keys
+
+this_not_that = hash1.keys - hash2.keys
+
+
+# @@PLEAC@@_5.12
+# no problem here, Ruby handles any kind of object for key-ing
+# (it takes Object#hash, which defaults to Object#id)
+
+
+# @@PLEAC@@_5.13
+# AFAIK, not possible in Ruby
+
+
+# @@PLEAC@@_5.14
+# Be careful, the following is possible only because Fixnum objects are
+# special (documentation says: there is effectively only one Fixnum object
+# instance for any given integer value).
+count = Hash.new(0)
+array.each { |e|
+ count[e] += 1
+}
+
+
+# @@PLEAC@@_5.15
+father = {
+ "Cain" , "Adam",
+ "Abel" , "Adam",
+ "Seth" , "Adam",
+ "Enoch" , "Cain",
+ "Irad" , "Enoch",
+ "Mehujael" , "Irad",
+ "Methusael" , "Mehujael",
+ "Lamech" , "Methusael",
+ "Jabal" , "Lamech",
+ "Jubal" , "Lamech",
+ "Tubalcain" , "Lamech",
+ "Enos" , "Seth",
+}
+
+while gets
+ chomp
+ begin
+ print $_, " "
+ end while $_ = father[$_]
+ puts
+end
+
+children = {}
+father.each { |k,v|
+ (children[v] ||= []) << k
+}
+while gets
+ chomp
+ puts "#{$_} begat #{(children[$_] || ['Nobody']).join(', ')}.\n"
+end
+
+includes = {}
+files.each { |f|
+ begin
+ for l in IO.readlines(f)
+ next unless l =~ /^\s*#\s*include\s*<([^>]+)>/
+ (includes[$1] ||= []) << f
+ end
+ rescue SystemCallError
+ $stderr.puts "#$! (skipping)"
+ end
+}
+
+include_free = includes.values.flatten.uniq - includes.keys
+
+
+# @@PLEAC@@_5.16
+# dutree - print sorted intented rendition of du output
+#% dutree
+#% dutree /usr
+#% dutree -a
+#% dutree -a /bin
+
+# The DuNode class collects all information about a directory,
+# and provides some convenience methods
+class DuNode
+
+ attr_reader :name
+ attr_accessor :size
+ attr_accessor :kids
+
+ def initialize(name)
+ @name = name
+ @kids = []
+ @size = 0
+ end
+
+ # support for sorting nodes with side
+ def size_compare(node2)
+ @size <=> node2.size
+ end
+
+ def basename
+ @name.sub(/.*\//, "")
+ end
+
+ #returns substring before last "/", nil if not there
+ def parent
+ p = @name.sub(/\/[^\/]+$/,"")
+ if p == @name
+ nil
+ else
+ p
+ end
+ end
+
+end
+
+# The DuTree does the acdtual work of
+# getting the input, parsing it, builging up a tree
+# and format it for output
+class Dutree
+
+ attr_reader :topdir
+
+ def initialize
+ @nodes = Hash.new
+ @dirsizes = Hash.new(0)
+ @kids = Hash.new([])
+ end
+
+ # get a node by name, create it if it does not exist yet
+ def get_create_node(name)
+ if @nodes.has_key?(name)
+ @nodes[name]
+ else
+ node = DuNode.new(name)
+ @nodes[name] = node
+ node
+ end
+ end
+
+ # run du, read in input, save sizes and kids
+ # stores last directory read in instance variable topdir
+ def input(arguments)
+ name = ""
+ cmd = "du " + arguments.join(" ")
+ IO.popen(cmd) { |pipe|
+ pipe.each { |line|
+ size, name = line.chomp.split(/\s+/, 2)
+ node = get_create_node(name)
+ node.size = size.to_i
+ @nodes[name] = node
+ parent = node.parent
+ if parent
+ get_create_node(parent).kids.push(node)
+ end
+ }
+ }
+ @topdir = @nodes[name]
+ end
+
+ # figure out how much is taken in each directory
+ # that isn't stored in the subdirectories. Add a new
+ # fake kid called "." containing that much.
+ def get_dots(node)
+ cursize = node.size
+ for kid in node.kids
+ cursize -= kid.size
+ get_dots(kid)
+ end
+ if node.size != cursize
+ newnode = get_create_node(node.name + "/.")
+ newnode.size = cursize
+ node.kids.push(newnode)
+ end
+ end
+
+ # recursively output everything
+ # passing padding and number width as well
+ # on recursive calls
+ def output(node, prefix="", width=0)
+ line = sprintf("%#{width}d %s", node.size, node.basename)
+ puts(prefix + line)
+ prefix += line.sub(/\d /, "| ")
+ prefix.gsub!(/[^|]/, " ")
+ if node.kids.length > 0 # not a bachelor node
+ kids = node.kids
+ kids.sort! { |a,b|
+ b.size_compare(a)
+ }
+ width = kids[0].size.to_s.length
+ for kid in kids
+ output(kid, prefix, width)
+ end
+ end
+ end
+
+end
+
+tree = Dutree.new
+tree.input(ARGV)
+tree.get_dots(tree.topdir)
+tree.output(tree.topdir)
+
+
+# @@PLEAC@@_6.0
+# The verbose version are match, sub, gsub, sub! and gsub!;
+# pattern needs to be a Regexp object; it yields a MatchData
+# object.
+pattern.match(string)
+string.sub(pattern, replacement)
+string.gsub(pattern, replacement)
+# As usual in Ruby, sub! does the same as sub but also modifies
+# the object, the same for gsub!/gsub.
+
+# Sugared syntax yields the position of the match (or nil if no
+# match). Note that the object at the right of the operator needs
+# not to be a Regexp object (it can be a String). The "dont
+# match" operator yields true or false.
+meadow =~ /sheep/ # position of the match, nil if no match
+meadow !~ /sheep/ # true if doesn't match, false if it does
+# There is no sugared version for the substitution
+
+meadow =~ /\bovines?\b/i and print "Here be sheep!"
+
+string = "good food"
+string.sub!(/o*/, 'e')
+
+# % echo ababacaca | ruby -ne 'puts $& if /(a|ba|b)+(a|ac)+/'
+# ababa
+
+# The "global" (or "multiple") match is handled by String#scan
+scan (/(\d+)/) {
+ puts "Found number #{$1}"
+}
+
+# String#scan yields an Array if not used with a block
+numbers = scan(/\d+/)
+
+digits = "123456789"
+nonlap = digits.scan(/(\d\d\d)/)
+yeslap = digits.scan(/(?=(\d\d\d))/)
+puts "Non-overlapping: #{nonlap.join(' ')}"
+puts "Overlapping: #{yeslap.join(' ')}";
+# Non-overlapping: 123 456 789
+# Overlapping: 123 234 345 456 567 678 789
+
+string = "And little lambs eat ivy"
+string =~ /l[^s]*s/
+puts "(#$`) (#$&) (#$')"
+# (And ) (little lambs) ( eat ivy)
+
+
+# @@PLEAC@@_6.1
+# Ruby doesn't have the same problem:
+dst = src.sub('this', 'that')
+
+progname = $0.sub('^.*/', '')
+
+bindirs = %w(/usr/bin /bin /usr/local/bin)
+libdirs = bindirs.map { |l| l.sub('bin', 'lib') }
+
+
+# @@PLEAC@@_6.3
+/\S+/ # as many non-whitespace bytes as possible
+/[A-Za-z'-]+/ # as many letters, apostrophes, and hyphens
+
+/\b([A-Za-z]+)\b/ # usually best
+/\s([A-Za-z]+)\s/ # fails at ends or w/ punctuation
+
+
+# @@PLEAC@@_6.4
+require 'socket'
+str = 'www.ruby-lang.org and www.rubygarden.org'
+re = /
+ ( # capture the hostname in $1
+ (?: # these parens for grouping only
+ (?! [-_] ) # lookahead for neither underscore nor dash
+ [\w-] + # hostname component
+ \. # and the domain dot
+ ) + # now repeat that whole thing a bunch of times
+ [A-Za-z] # next must be a letter
+ [\w-] + # now trailing domain part
+ ) # end of $1 capture
+ /x # /x for nice formatting
+
+str.gsub! re do # pass a block to execute replacement
+ host = TCPsocket.gethostbyname($1)
+ "#{$1} [#{host[3]}]"
+end
+
+puts str
+#-----------------------------
+# to match whitespace or #-characters in an extended re you need to escape
+# them.
+
+foo = 42
+str = 'blah #foo# blah'
+str.gsub! %r/ # replace
+ \# # a pound sign
+ (\w+) # the variable name
+ \# # another pound sign
+ /x do
+ eval $1 # with the value of a local variable
+ end
+puts str # => blah 42 blah
+
+
+# @@PLEAC@@_6.5
+# The 'g' modifier doesn't exist in Ruby, a regexp can't be used
+# directly in a while loop; instead, use String#scan { |match| .. }
+fish = 'One fish two fish red fish blue fish'
+WANT = 3
+count = 0
+fish.scan(/(\w+)\s+fish\b/i) {
+ if (count += 1) == WANT
+ puts "The third fish is a #{$1} one."
+ end
+}
+
+if fish =~ /(?:\w+\s+fish\s+){2}(\w+)\s+fish/i
+ puts "The third fish is a #{$1} one."
+end
+
+pond = 'One fish two fish red fish blue fish'
+# String#scan without a block gives an array of matches, each match
+# being an array of all the specified groups
+colors = pond.scan(/(\w+)\s+fish\b/i).flatten # get all matches
+color = colors[2] # then the one we want
+# or without a temporary array
+color = pond.scan(/(\w+)\s+fish\b/i).flatten[2] # just grab element 3
+puts "The third fish in the pond is #{color}."
+
+count = 0
+fishes = 'One fish two fish red fish blue fish'
+evens = fishes.scan(/(\w+)\s+fish\b/i).select { (count+=1) % 2 == 0 }
+print "Even numbered fish are #{evens.join(' ')}."
+
+count = 0
+fishes.gsub(/
+ \b # makes next \w more efficient
+ ( \w+ ) # this is what we\'ll be changing
+ (
+ \s+ fish \b
+ )
+ /x) {
+ if (count += 1) == 4
+ 'sushi' + $2
+ else
+ $1 + $2
+ end
+}
+
+pond = 'One fish two fish red fish blue fish swim here.'
+puts "Last fish is #{pond.scan(/\b(\w+)\s+fish\b/i).flatten[-1]}"
+
+/
+ A # find some pattern A
+ (?! # mustn\'t be able to find
+ .* # something
+ A # and A
+ )
+ $ # through the end of the string
+/x
+
+# The "s" perl modifier is "m" in Ruby (not very nice since there is
+# also an "m" in perl..)
+pond = "One fish two fish red fish blue fish swim here."
+if (pond =~ /
+ \b ( \w+) \s+ fish \b
+ (?! .* \b fish \b )
+ /mix)
+ puts "Last fish is #{$1}."
+else
+ puts "Failed!"
+end
+
+
+# @@PLEAC@@_6.6
+#-----------------------------
+#!/usr/bin/ruby -w
+# killtags - very bad html killer
+$/ = nil; # each read is whole file
+while file = gets() do
+ file.gsub!(/<.*?>/m,''); # strip tags (terribly)
+ puts file # print file to STDOUT
+end
+#-----------------------------
+#!/usr/bin/ruby -w
+#headerfy - change certain chapter headers to html
+$/ = ''
+while file = gets() do
+ pattern = /
+ \A # start of record
+ ( # capture in $1
+ Chapter # text string
+ \s+ # mandatory whitespace
+ \d+ # decimal number
+ \s* # optional whitespace
+ : # a real colon
+ . * # anything not a newline till end of line
+ )
+ /x
+ puts file.gsub(pattern,'<H1>\1</H1>')
+end
+#-----------------------------
+#% ruby -00pe "gsub!(/\A(Chapter\s+\d+\s*:.*)/,'<H1>\1</H1>')" datafile
+
+#!/usr/bin/ruby -w
+#-----------------------------
+for file in ARGV
+ file = File.open(ARGV.shift)
+ while file.gets('') do # each read is a paragraph
+ print "chunk #{$.} in $ARGV has <<#{$1}>>\n" while /^START(.*?)^END/m
+ end # /m activates the multiline mode
+end
+#-----------------------------
+
+# @@PLEAC@@_6.7
+#-----------------------------
+$/ = nil;
+file = File.open("datafile")
+chunks = file.gets.split(/pattern/)
+#-----------------------------
+# .Ch, .Se and .Ss divide chunks of STDIN
+chunks = gets(nil).split(/^\.(Ch|Se|Ss)$/)
+print "I read #{chunks.size} chunks.\n"
+#-----------------------------
+
+
+# @@PLEAC@@_6.8
+while gets
+ if ~/BEGIN/ .. ~/END/
+ # line falls between BEGIN and END inclusive
+ end
+end
+
+while gets
+ if ($. == firstnum) .. ($. == lastnum)
+ # operate between firstnum and lastnum line number
+ end
+end
+
+# in ruby versions prior to 1.8, the above two conditional
+# expressions could be shortened to:
+# if /BEGIN/ .. /END/
+# and
+# if firstnum .. lastnum
+# but these now only work this way from the command line
+
+#-----------------------------
+
+while gets
+ if ~/BEGIN/ ... ~/END/
+ # line falls between BEGIN and END on different lines
+ end
+end
+
+while gets
+ if ($. == first) ... ($. == last)
+ # operate between first and last line number on different lines
+ end
+end
+
+#-----------------------------
+# command-line to print lines 15 through 17 inclusive (see below)
+ruby -ne 'print if 15 .. 17' datafile
+
+# print out all <XMP> .. </XMP> displays from HTML doc
+while gets
+ print if ~%r#<XMP>#i .. ~%r#</XMP>#i;
+end
+
+# same, but as shell command
+# ruby -ne 'print if %r#<XMP>#i .. %r#</XMP>#i' document.html
+#-----------------------------
+# ruby -ne 'BEGIN { $top=3; $bottom=5 }; \
+# print if $top .. $bottom' /etc/passwd # FAILS
+# ruby -ne 'BEGIN { $top=3; $bottom=5 }; \
+# print if $. == $top .. $. == $bottom' /etc/passwd # works
+# ruby -ne 'print if 3 .. 5' /etc/passwd # also works
+#-----------------------------
+print if ~/begin/ .. ~/end/;
+print if ~/begin/ ... ~/end/;
+#-----------------------------
+while gets
+ $in_header = $. == 1 .. ~/^$/ ? true : false
+ $in_body = ~/^$/ .. ARGF.eof ? true : false
+end
+#-----------------------------
+seen = {}
+ARGF.each do |line|
+ next unless line =~ /^From:?\s/i .. line =~ /^$/;
+ line.scan(%r/([^<>(),;\s]+\@[^<>(),;\s]+)/).each do |addr|
+ puts addr unless seen[addr]
+ seen[addr] ||= 1
+ end
+end
+
+
+# @@PLEAC@@_6.9
+def glob2pat(globstr)
+ patmap = {
+ '*' => '.*',
+ '?' => '.',
+ '[' => '[',
+ ']' => ']',
+ }
+ globstr.gsub!(/(.)/) { |c| patmap[c] || Regexp::escape(c) }
+ '^' + globstr + '$'
+end
+
+
+# @@PLEAC@@_6.10
+# avoid interpolating patterns like this if the pattern
+# isn't going to change:
+pattern = ARGV.shift
+ARGF.each do |line|
+ print line if line =~ /#{pattern}/
+end
+
+# the above creates a new regex each iteration. Instead,
+# use the /o modifier so the regex is compiled only once
+
+pattern = ARGV.shift
+ARGF.each do |line|
+ print line if line =~ /#{pattern}/o
+end
+
+#-----------------------------
+
+#!/usr/bin/ruby
+# popgrep1 - grep for abbreviations of places that say "pop"
+# version 1: slow but obvious way
+popstates = %w(CO ON MI WI MN)
+ARGF.each do |line|
+ popstates.each do |state|
+ if line =~ /\b#{state}\b/
+ print line
+ last
+ end
+ end
+end
+
+#-----------------------------
+#!/usr/bin/ruby
+# popgrep2 - grep for abbreviations of places that say "pop"
+# version 2: eval strings; fast but hard to quote
+popstates = %w(CO ON MI WI MN)
+code = "ARGF.each do |line|\n"
+popstates.each do |state|
+ code += "\tif line =~ /\\b#{state}\\b/; print(line); next; end\n"
+end
+code += "end\n"
+print "CODE IS\n---\n#{code}\n---\n" if false # turn on for debugging
+eval code
+
+# CODE IS
+# ---
+# ARGF.each do |line|
+# if line =~ /\bCO\b/; print(line); next; end
+# if line =~ /\bON\b/; print(line); next; end
+# if line =~ /\bMI\b/; print(line); next; end
+# if line =~ /\bWI\b/; print(line); next; end
+# if line =~ /\bMN\b/; print(line); next; end
+# end
+#
+# ---
+
+## alternatively, the same idea as above but compiling
+## to a case statement: (not in perlcookbook)
+#!/usr/bin/ruby -w
+# popgrep2.5 - grep for abbreviations of places that say "pop"
+# version 2.5: eval strings; fast but hard to quote
+popstates = %w(CO ON MI WI MN)
+code = "ARGF.each do |line|\n case line\n"
+popstates.each do |state|
+ code += " when /\\b#{state}\\b/ : print line\n"
+end
+code += " end\nend\n"
+print "CODE IS\n---\n#{code}\n---\n" if false # turn on for debugging
+eval code
+
+# CODE IS
+# ---
+# ARGF.each do |line|
+# case line
+# when /\bCO\b/ : print line
+# when /\bON\b/ : print line
+# when /\bMI\b/ : print line
+# when /\bWI\b/ : print line
+# when /\bMN\b/ : print line
+# end
+# end
+#
+# ---
+
+# Note: (above) Ruby 1.8+ allows the 'when EXP : EXPR' on one line
+# with the colon separator.
+
+#-----------------------------
+#!/usr/bin/ruby
+# popgrep3 - grep for abbreviations of places that say "pop"
+# version3: build a match_any function
+popstates = %w(CO ON MI WI MN)
+expr = popstates.map{|e|"line =~ /\\b#{e}\\b/"}.join('||')
+eval "def match_any(line); #{expr};end"
+ARGF.each do |line|
+ print line if match_any(line)
+end
+#-----------------------------
+
+## building a match_all function is a trivial
+## substitution of && for ||
+## here is a generalized example:
+#!/usr/bin/ruby -w
+## grepauth - print lines that mention both foo and bar
+class MultiMatch
+ def initialize(*patterns)
+ _any = build_match('||',patterns)
+ _all = build_match('&&',patterns)
+ eval "def match_any(line);#{_any};end\n"
+ eval "def match_all(line);#{_all};end\n"
+ end
+ def build_match(sym,args)
+ args.map{|e|"line =~ /#{e}/"}.join(sym)
+ end
+end
+
+mm = MultiMatch.new('foo','bar')
+ARGF.each do |line|
+ print line if mm.match_all(line)
+end
+#-----------------------------
+
+#!/usr/bin/ruby
+# popgrep4 - grep for abbreviations of places that say "pop"
+# version4: pretty fast, but simple: compile all re's first:
+popstates = %w(CO ON MI WI MN)
+popstates = popstates.map{|re| %r/\b#{re}\b/}
+ARGF.each do |line|
+ popstates.each do |state_re|
+ if line =~ state_re
+ print line
+ break
+ end
+ end
+end
+
+## speeds trials on the jargon file(412): 26006 lines, 1.3MB
+## popgrep1 => 7.040s
+## popgrep2 => 0.656s
+## popgrep2.5 => 0.633s
+## popgrep3 => 0.675s
+## popgrep4 => 1.027s
+
+# unless speed is criticial, the technique in popgrep4 is a
+# reasonable balance between speed and logical simplicity.
+
+
+# @@PLEAC@@_6.11
+begin
+ print "Pattern? "
+ pat = $stdin.gets.chomp
+ Regexp.new(pat)
+rescue
+ warn "Invalid Pattern"
+ retry
+end
+
+
+# @@PLEAC@@_6.13
+# uses the 'amatch' extension found on:
+# http://raa.ruby-lang.org/project/amatch/
+require 'amatch'
+matcher = Amatch.new('balast')
+#$relative, $distance = 0, 1
+File.open('/usr/share/dict/words').each_line do |line|
+ print line if matcher.search(line) <= 1
+end
+__END__
+#CODE
+ballast
+ballasts
+balustrade
+balustrades
+blast
+blasted
+blaster
+blasters
+blasting
+blasts
+
+
+# @@PLEAC@@_6.14
+str.scan(/\G(\d)/).each do |token|
+ puts "found #{token}"
+end
+#-----------------------------
+n = " 49 here"
+n.gsub!(/\G /,'0')
+puts n
+#-----------------------------
+str = "3,4,5,9,120"
+str.scan(/\G,?(\d+)/).each do |num|
+ puts "Found number: #{num}"
+end
+#-----------------------------
+# Ruby doesn't have the String.pos or a /c re modifier like Perl
+# But it does have StringScanner in the standard library (strscn)
+# which allows similar functionality:
+
+require 'strscan'
+text = 'the year 1752 lost 10 days on the 3rd of September'
+sc = StringScanner.new(text)
+while sc.scan(/.*?(\d+)/)
+ print "found: #{sc[1]}\n"
+end
+if sc.scan(/\S+/)
+ puts "Found #{sc[0]} after last number"
+end
+#-----------------------------
+# assuming continuing from above:
+puts "The position in 'text' is: #{sc.pos}"
+sc.pos = 30
+puts "The position in 'text' is: #{sc.pos}"
+
+
+# @@PLEAC@@_6.15
+#-----------------------------
+# greedy pattern
+str.gsub!(/<.*>/m,'') # not good
+
+# non-greedy (minimal) pattern
+str.gsub!(/<.*?>/m,'') # not great
+
+
+#-----------------------------
+#<b><i>this</i> and <i>that</i> are important</b> Oh, <b><i>me too!</i></b>
+#-----------------------------
+%r{ <b><i>(.*?)</i></b> }mx
+#-----------------------------
+%r/BEGIN((?:(?!BEGIN).)*)END/
+#-----------------------------
+%r{ <b><i>( (?: (?!</b>|</i>). )* ) </i></b> }mx
+#-----------------------------
+%r{ <b><i>( (?: (?!</[ib]>). )* ) </i></b> }mx
+#-----------------------------
+%r{
+ <b><i>
+ [^<]* # stuff not possibly bad, and not possibly the end.
+ (?:
+ # at this point, we can have '<' if not part of something bad
+ (?! </?[ib]> ) # what we can't have
+ < # okay, so match the '<'
+ [^<]* # and continue with more safe stuff
+ ) *
+ </i></b>
+ }mx
+
+
+# @@PLEAC@@_6.16
+#-----------------------------
+$/ = ""
+ARGF.each do |para|
+ para.scan %r/
+ \b # start at word boundary
+ (\S+) # find chunk of non-whitespace
+ \b # until a word boundary
+ (
+ \s+ # followed by whitespace
+ \1 # and that same chunk again
+ \b # and a word boundary
+ ) + # one or more times
+ /xi do
+ puts "dup word '#{$1}' at paragraph #{$.}"
+ end
+end
+#-----------------------------
+astr = 'nobody'
+bstr = 'bodysnatcher'
+if "#{astr} #{bstr}" =~ /^(\w+)(\w+) \2(\w+)$/
+ print "#{$2} overlaps in #{$1}-#{$2}-#{$3}"
+end
+#-----------------------------
+#!/usr/bin/ruby -w
+# prime_pattern -- find prime factors of argument using patterns
+ARGV << 180
+cap = 'o' * ARGV.shift
+while cap =~ /^(oo+?)\1+$/
+ print $1.size, " "
+ cap.gsub!(/#{$1}/,'o')
+end
+puts cap.size
+#-----------------------------
+#diophantine
+# solve for 12x + 15y + 16z = 281, maximizing x
+if ('o' * 281).match(/^(o*)\1{11}(o*)\2{14}(o*)\3{15}$/)
+ x, y, z = $1.size, $2.size, $3.size
+ puts "One solution is: x=#{x}; y=#{y}; z=#{z}"
+else
+ puts "No solution."
+end
+# => One solution is: x=17; y=3; z=2
+
+#-----------------------------
+# using different quantifiers:
+('o' * 281).match(/^(o+)\1{11}(o+)\2{14}(o+)\3{15}$/)
+# => One solution is: x=17; y=3; z=2
+
+('o' * 281).match(/^(o*?)\1{11}(o*)\2{14}(o*)\3{15}$/)
+# => One solution is: x=0; y=7; z=11
+
+('o' * 281).match(/^(o+?)\1{11}(o*)\2{14}(o*)\3{15}$/)
+# => One solution is: x=1; y=3; z=14
+
+
+# @@PLEAC@@_6.17
+# alpha OR beta
+%r/alpha|beta/
+
+# alpha AND beta
+%r/(?=.*alpha)(?=.*beta)/m
+
+# alpha AND beta, no overlap
+%r/alpha.*beta|beta.*alpha/m
+
+# NOT beta
+%r/^(?:(?!beta).)*$/m
+
+# NOT bad BUT good
+%r/(?=(?:(?!BAD).)*$)GOOD/m
+#-----------------------------
+
+if !(string =~ /pattern/) # ugly
+ something()
+end
+
+if string !~ /pattern/ # preferred
+ something()
+end
+
+
+#-----------------------------
+if string =~ /pat1/ && string =~ /pat2/
+ something()
+end
+#-----------------------------
+if string =~ /pat1/ || string =~ /pat2/
+ something()
+end
+#-----------------------------
+#!/usr/bin/ruby -w
+# minigrep - trivial grep
+pat = ARGV.shift
+ARGF.each do |line|
+ print line if line =~ /#{pat}/o
+end
+#-----------------------------
+ "labelled" =~ /^(?=.*bell)(?=.*lab)/m
+#-----------------------------
+$string =~ /bell/ && $string =~ /lab/
+#-----------------------------
+$murray_hill = "blah bell blah "
+if $murray_hill =~ %r{
+ ^ # start of string
+ (?= # zero-width lookahead
+ .* # any amount of intervening stuff
+ bell # the desired bell string
+ ) # rewind, since we were only looking
+ (?= # and do the same thing
+ .* # any amount of intervening stuff
+ lab # and the lab part
+ )
+ }mx # /m means . can match newline
+
+ print "Looks like Bell Labs might be in Murray Hill!\n";
+end
+#-----------------------------
+"labelled" =~ /(?:^.*bell.*lab)|(?:^.*lab.*bell)/
+#-----------------------------
+$brand = "labelled";
+if $brand =~ %r{
+ (?: # non-capturing grouper
+ ^ .*? # any amount of stuff at the front
+ bell # look for a bell
+ .*? # followed by any amount of anything
+ lab # look for a lab
+ ) # end grouper
+ | # otherwise, try the other direction
+ (?: # non-capturing grouper
+ ^ .*? # any amount of stuff at the front
+ lab # look for a lab
+ .*? # followed by any amount of anything
+ bell # followed by a bell
+ ) # end grouper
+ }mx # /m means . can match newline
+ print "Our brand has bell and lab separate.\n";
+end
+#-----------------------------
+$map =~ /^(?:(?!waldo).)*$/s
+#-----------------------------
+$map = "the great baldo"
+if $map =~ %r{
+ ^ # start of string
+ (?: # non-capturing grouper
+ (?! # look ahead negation
+ waldo # is he ahead of us now?
+ ) # is so, the negation failed
+ . # any character (cuzza /s)
+ ) * # repeat that grouping 0 or more
+ $ # through the end of the string
+ }mx # /m means . can match newline
+ print "There's no waldo here!\n";
+end
+=begin
+ 7:15am up 206 days, 13:30, 4 users, load average: 1.04, 1.07, 1.04
+
+USER TTY FROM LOGIN@ IDLE JCPU PCPU WHAT
+
+tchrist tty1 5:16pm 36days 24:43 0.03s xinit
+
+tchrist tty2 5:19pm 6days 0.43s 0.43s -tcsh
+
+tchrist ttyp0 chthon 7:58am 3days 23.44s 0.44s -tcsh
+
+gnat ttyS4 coprolith 2:01pm 13:36m 0.30s 0.30s -tcsh
+=end
+#% w | minigrep '^(?!.*ttyp).*tchrist'
+#-----------------------------
+%r{
+ ^ # anchored to the start
+ (?! # zero-width look-ahead assertion
+ .* # any amount of anything (faster than .*?)
+ ttyp # the string you don't want to find
+ ) # end look-ahead negation; rewind to start
+ .* # any amount of anything (faster than .*?)
+ tchrist # now try to find Tom
+}x
+#-----------------------------
+#% w | grep tchrist | grep -v ttyp
+#-----------------------------
+#% grep -i 'pattern' files
+#% minigrep '(?i)pattern' files
+#-----------------------------
+
+
+# @@PLEAC@@_6.20
+ans = $stdin.gets.chomp
+re = %r/^#{Regexp.quote(ans)}/
+case
+ when "SEND" =~ re : puts "Action is send"
+ when "STOP" =~ re : puts "Action is stop"
+ when "ABORT" =~ re : puts "Action is abort"
+ when "EDIT" =~ re : puts "Action is edit"
+end
+#-----------------------------
+require 'abbrev'
+table = Abbrev.abbrev %w-send stop abort edit-
+loop do
+ print "Action: "
+ ans = $stdin.gets.chomp
+ puts "Action for #{ans} is #{table[ans.downcase]}"
+end
+
+
+#-----------------------------
+# dummy values are defined for 'file', 'PAGER', and
+# the 'invoke_editor' and 'deliver_message' methods
+# do not do anything interesting in this example.
+#!/usr/bin/ruby -w
+require 'abbrev'
+
+file = 'pleac_ruby.data'
+PAGER = 'less'
+
+def invoke_editor
+ puts "invoking editor"
+end
+
+def deliver_message
+ puts "delivering message"
+end
+
+actions = {
+ 'edit' => self.method(:invoke_editor),
+ 'send' => self.method(:deliver_message),
+ 'list' => proc {system(PAGER, file)},
+ 'abort' => proc {puts "See ya!"; exit},
+ "" => proc {puts "Unknown Command"}
+}
+
+dtable = Abbrev.abbrev(actions.keys)
+loop do
+ print "Action: "
+ ans = $stdin.gets.chomp.delete(" \t")
+ actions[ dtable[ans.downcase] || "" ].call
+end
+
+
+# @@PLEAC@@_6.19
+#-----------------------------
+# basically, the Perl Cookbook categorizes this as an
+# unsolvable problem ...
+#-----------------------------
+1 while addr.gsub!(/\([^()]*\)/,'')
+#-----------------------------
+Dear someuser@host.com,
+
+Please confirm the mail address you gave us Wed May 6 09:38:41
+MDT 1998 by replying to this message. Include the string
+"Rumpelstiltskin" in that reply, but spelled in reverse; that is,
+start with "Nik...". Once this is done, your confirmed address will
+be entered into our records.
+
+
+# @@PLEAC@@_6.21
+#-----------------------------
+#% gunzip -c ~/mail/archive.gz | urlify > archive.urlified
+#-----------------------------
+#% urlify ~/mail/*.inbox > ~/allmail.urlified
+#-----------------------------
+#!/usr/bin/ruby -w
+# urlify - wrap HTML links around URL-like constructs
+
+urls = '(https?|telnet|gopher|file|wais|ftp)';
+ltrs = '\w';
+gunk = '/#~:.?+=&%@!\-';
+punc = '.:?\-';
+any = "#{ltrs}#{gunk}#{punc}";
+
+ARGF.each do |line|
+ line.gsub! %r/
+ \b # start at word boundary
+ ( # begin $1 {
+ #{urls} : # need resource and a colon
+ [#{any}] +? # followed by on or more
+ # of any valid character, but
+ # be conservative and take only
+ # what you need to....
+ ) # end $1 }
+ (?= # look-ahead non-consumptive assertion
+ [#{punc}]* # either 0 or more punctuation
+ [^#{any}] # followed by a non-url char
+ | # or else
+ $ # then end of the string
+ )
+ /iox do
+ %Q|<A HREF="#{$1}">#{$1}</A>|
+ end
+ print line
+end
+
+
+# @@PLEAC@@_6.23
+%r/^m*(d?c{0,3}|c[dm])(l?x{0,3}|x[lc])(v?i{0,3}|i[vx])$/i
+#-----------------------------
+str.sub!(/(\S+)(\s+)(\S+)/, '\3\2\1')
+#-----------------------------
+%r/(\w+)\s*=\s*(.*)\s*$/ # keyword is $1, value is $2
+#-----------------------------
+%r/.{80,}/
+#-----------------------------
+%r|(\d+)/(\d+)/(\d+) (\d+):(\d+):(\d+)|
+#-----------------------------
+str.gsub!(%r|/usr/bin|,'/usr/local/bin')
+#-----------------------------
+str.gsub!(/%([0-9A-Fa-f][0-9A-Fa-f])/){ $1.hex.chr }
+#-----------------------------
+str.gsub!(%r{
+ /\* # Match the opening delimiter
+ .*? # Match a minimal number of characters
+ \*/ # Match the closing delimiter
+}xm,'')
+#-----------------------------
+str.sub!(/^\s+/, '')
+str.sub!(/\s+$/, '')
+
+# but really, in Ruby we'd just do:
+str.strip!
+#-----------------------------
+str.gsub!(/\\n/,"\n")
+#-----------------------------
+str.sub!(/^.*::/, '')
+#-----------------------------
+%r/^([01]?\d\d|2[0-4]\d|25[0-5])\.([01]?\d\d|2[0-4]\d|25[0-5])\.
+ ([01]?\d\d|2[0-4]\d|25[0-5])\.([01]?\d\d|2[0-4]\d|25[0-5])$/x
+#-----------------------------
+str.sub!(%r|^.*/|, '')
+#-----------------------------
+cols = ( (ENV['TERMCAP'] || " ") =~ /:co#(\d+):/ ) ? $1 : 80;
+#-----------------------------
+name = " #{$0} #{ARGV}".gsub(%r| /\S+/|, ' ')
+#-----------------------------
+require 'rbconfig'
+include Config
+raise "This isn't Linux" unless CONFIG['target_os'] =~ /linux/i;
+#-----------------------------
+str.gsub!(%r/\n\s+/, ' ')
+#-----------------------------
+nums = str.scan(/(\d+\.?\d*|\.\d+)/)
+#-----------------------------
+capwords = str.scan(%r/(\b[^\Wa-z0-9_]+\b)/)
+#-----------------------------
+lowords = str.scan(%r/(\b[^\WA-Z0-9_]+\b)/)
+#-----------------------------
+icwords = str.scan(%r/(\b[^\Wa-z0-9_][^\WA-Z0-9_]*\b)/)
+#-----------------------------
+links = str.scan(%r/<A[^>]+?HREF\s*=\s*["']?([^'" >]+?)[ '"]?>/mi)
+#-----------------------------
+initial = str =~ /^\S+\s+(\S)\S*\s+\S/ ? $1 : ""
+#-----------------------------
+str.gsub!(%r/"([^"]*)"/, %q-``\1''-)
+#-----------------------------
+
+$/ = ""
+sentences = []
+ARGF.each do |para|
+ para.gsub!(/\n/, ' ')
+ para.gsub!(/ {3,}/,' ')
+ sentences << para.scan(/(\S.*?[!?.])(?= |\Z)/)
+end
+
+#-----------------------------
+%r/(\d{4})-(\d\d)-(\d\d)/ # YYYY in $1, MM in $2, DD in $3
+#-----------------------------
+%r/ ^
+ (?:
+ 1 \s (?: \d\d\d \s)? # 1, or 1 and area code
+ | # ... or ...
+ \(\d\d\d\) \s # area code with parens
+ | # ... or ...
+ (?: \+\d\d?\d? \s)? # optional +country code
+ \d\d\d ([\s\-]) # and area code
+ )
+ \d\d\d (\s|\1) # prefix (and area code separator)
+ \d\d\d\d # exchange
+ $
+ /x
+#-----------------------------
+%r/\boh\s+my\s+gh?o(d(dess(es)?|s?)|odness|sh)\b/i
+#-----------------------------
+lines = []
+lines << $1 while input.sub!(/^([^\012\015]*)(\012\015?|\015\012?)/,'')
+
+
+# @@PLEAC@@_7.0
+# An IO object being Enumerable, we can use 'each' directly on it
+File.open("/usr/local/widgets/data").each { |line|
+ puts line if line =~ /blue/
+}
+
+logfile = File.new("/var/log/rubylog.txt", "w")
+mysub($stdin, logfile)
+
+# The method IO#readline is similar to IO#gets
+# but throws an exception when it reaches EOF
+f = File.new("bla.txt")
+begin
+ while (line = f.readline)
+ line.chomp
+ $stdout.print line if line =~ /blue/
+ end
+rescue EOFError
+ f.close
+end
+
+while $stdin.gets # reads from STDIN
+ unless (/\d/)
+ $stderr.puts "No digit found." # writes to STDERR
+ end
+ puts "Read: #{$_}" # writes to STDOUT
+end
+
+logfile = File.new("/tmp/log", "w")
+
+logfile.close
+
+# $defout (or its synonym '$>') is the destination of output
+# for Kernel#print, Kernel#puts, and family functions
+logfile = File.new("log.txt", "w")
+old = $defout
+$defout = logfile # switch to logfile for output
+puts "Countdown initiated ..."
+$defout = old # return to original output
+puts "You have 30 seconds to reach minimum safety distance."
+
+
+# @@PLEAC@@_7.1
+source = File.new(path, "r") # open file "path" for reading only
+sink = File.new(path, "w") # open file "path" for writing only
+
+source = File.open(path, File::RDONLY) # open file "path" for reading only
+sink = File.open(path, File::WRONLY) # open file "path" for writing only
+
+file = File.open(path, "r+") # open "path" for reading and writing
+file = File.open(path, flags) # open "path" with the flags "flags" (see examples below for flags)
+
+# open file "path" read only
+file = File.open(path, "r")
+file = File.open(path, File::RDONLY)
+
+# open file "path" write only, create it if it does not exist
+# truncate it to zero length if it exists
+file = File.open(path, "w")
+file = File.open(path, File::WRONLY|File::TRUNC|File::CREAT)
+file = File.open(path, File::WRONLY|File::TRUNC|File::CREAT, 0666) # with permission 0666
+
+# open file "path" write only, fails if file exists
+file = File.open(path, File::WRONLY|File::EXCL|File::CREAT)
+file = File.open(path, File::WRONLY|File::EXCL|File::CREAT, 0666)
+
+# open file "path" for appending
+file = File.open(path, "a")
+file = File.open(path, File::WRONLY|File::APPEND|File::CREAT)
+file = File.open(path, File::WRONLY|File::APPEND|File::CREAT, 0666)
+
+# open file "path" for appending only when file exists
+file = File.open(path, File::WRONLY|File::APPEND)
+
+# open file "path" for reading and writing
+file = File.open(path, "r+")
+file = File.open(path, File::RDWR)
+
+# open file for reading and writing, create a new file if it does not exist
+file = File.open(path, File::RDWR|File::CREAT)
+file = File.open(path, File::RDWR|File::CREAT, 0600)
+
+# open file "path" reading and writing, fails if file exists
+file = File.open(path, File::RDWR|File::EXCL|File::CREAT)
+file = File.open(path, File::RDWR|File::EXCL|File::CREAT, 0600)
+
+
+# @@PLEAC@@_7.2
+# No problem with Ruby since the filename doesn't contain characters with
+# special meaning; like Perl's sysopen
+File.open(filename, 'r')
+
+
+# @@PLEAC@@_7.3
+File.expand_path('~root/tmp')
+#=> "/root/tmp"
+File.expand_path('~rpcuser')
+#=> "/var/lib/nfs"
+
+# To expand ~/.. it explicitely needs the environment variable HOME
+File.expand_path('~/tmp')
+#=> "/home/gc/tmp"
+
+
+# @@PLEAC@@_7.4
+# The exception raised in Ruby reports the filename
+File.open('afile')
+
+
+# @@PLEAC@@_7.5
+# Standard Ruby distribution provides the following useful extension
+require 'tempfile'
+# With the Tempfile class, the file is automatically deleted on garbage
+# collection, so you won't need to remove it, later on.
+tf = Tempfile.new('tmp') # a name is required to create the filename
+
+# If you need to pass the filename to an external program you can use
+# File#path, but don't forget to File#flush in order to flush anything
+# living in some buffer somewhere.
+tf.flush
+system("/usr/bin/dowhatever #{tf.path}")
+
+fh = Tempfile.new('tmp')
+fh.sync = true # autoflushes
+10.times { |i| fh.puts i }
+fh.rewind
+puts 'Tmp file has: ', fh.readlines
+
+
+# @@PLEAC@@_7.6
+while (DATA.gets) do
+ # process the line
+end
+__END__
+# your data goes here
+# __DATA__ doesn't exist in Ruby
+
+#CODE
+# get info about the script (size, date of last modification)
+kilosize = DATA.stat.size / 1024
+last_modif = DATA.stat.mtime
+puts "<P>Script size is #{kilosize}"
+puts "<P>Last script update: #{last_modif}"
+__END__
+# DO NOT REMOVE THE PRECEEDING LINE.
+# Everything else in this file will be ignored.
+#CODE
+
+
+# @@PLEAC@@_7.7
+while line = gets do
+ # do something with line.
+end
+
+# or
+while gets do
+ # do something with $_
+end
+
+# or more rubyish
+$stdun.each do |line|
+ # do stuff with line
+end
+
+
+# ARGF may makes this more easy
+# this is skipped if ARGV.size==0
+ARGV.each do |filename|
+ # closing and exception handling are done by the block
+ open(filename) do |fd|
+ fd.each do |line|
+ # do stuff with line
+ end
+ end rescue abort("can't open %s" % filename)
+end
+
+# globbing is done in the Dir module
+ARGV = Dir["*.[Cch]"] if ARGV.empty?
+
+# note: optparse is the preferred way to handle this
+if (ARGV[0] == '-c')
+ chop_first += 1
+ ARGV.shift
+end
+
+
+# processing numerical options
+if ARGV[0] =~ /^-(\d+)$/
+ columns = $1
+ ARGV.shift
+end
+
+# again, better to use optparse:
+require 'optparse'
+nostdout = 0
+append = 0
+unbuffer = 0
+ignore_ints = 0
+ARGV.options do |opt|
+ opt.on('-n') { nostdout +=1 }
+ opt.on('-a') { append +=1 }
+ opt.on('-u') { unbuffer +=1 }
+ opt.on('-i') { ignore_ints +=1 }
+ opt.parse!
+end or abort("usage: " + __FILE__ + " [-ainu] [filenames]")
+
+# no need to do undef $/, we have File.read
+str = File.read(ARGV[0])
+
+# again we have File.read
+str = File.read(ARGV[0])
+
+# not sure what this should do:
+# I believe open the file, print filename, lineno and line:
+ARGF.each_with_index do |line, idx|
+ print ARGF.filename, ":", idx, ";", line
+end
+
+# print all the lines in every file passed via command line that contains login
+ARGF.each do |line|
+ puts line if line =~ /login/
+end
+#
+# even this would fit
+#%ruby -ne "print if /f/" 2.log
+#
+
+ARGF.each { |l| puts l.downcase! }
+
+#------------------
+#!/usr/bin/ruby -p
+# just like perl's -p
+$_.downcase!
+#
+
+# I don't know who should I trust.
+# perl's version splits on \w+ while python's on \w.
+
+chunks = 0
+
+File.read(ARGV[0]).split.each do |word|
+ next if word =~ /^#/
+ break if ["__DATA__", "__END__"].member? word
+ chunks += 1
+end
+
+print "Found ", chunks, " chunks\n"
+
+
+# @@PLEAC@@_7.8
+old = File.open(old_file)
+new = File.open(new_file, "w")
+while old.gets do
+ # change $_, then...
+ new.print $_
+end
+old.close
+new.close
+File.rename(old_file, "old.orig")
+File.rename(new_file, old_file)
+
+while old.gets do
+ if $. == 20 then # we are at the 20th line
+ new.puts "Extra line 1"
+ new.puts "Extra line 2"
+ end
+ new.print $_
+end
+
+while old.gets do
+ next if 20..30 # skip the 20th line to the 30th
+ # Ruby (and Perl) permit to write if 20..30
+ # instead of if (20 <= $.) and ($. <= 30)
+ new.print $_
+end
+
+
+# @@PLEAC@@_7.9
+#% ruby -i.orig -pe 'FILTER COMMAND' file1 file2 file3 ...
+#
+#-----------------------------
+##!/usr/bin/ruby -i.orig -p
+# filter commands go here
+#-----------------------------
+
+#% ruby -pi.orig -e 'gsub!(/DATE/){Time.now)'
+
+# effectively becomes:
+ARGV << 'I'
+oldfile = ""
+while gets
+ if ARGF.filename != oldfile
+ newfile = ARGF.filename
+ File.rename(newfile, newfile + ".orig")
+ $stdout = File.open(newfile,'w')
+ oldfile = newfile
+ end
+ gsub!(/DATE/){Time.now}
+ print
+end
+$stdout = STDOUT
+#-----------------------------
+#% ruby -i.old -pe 'gsub!(%r{\bhisvar\b}, 'hervar')' *.[Cchy]
+
+#-----------------------------
+# set up to iterate over the *.c files in the current directory,
+# editing in place and saving the old file with a .orig extension
+$-i = '.orig' # set up -i mode
+ARGV.replace(Dir['*.[Cchy]'])
+while gets
+ if $. == 1
+ print "This line should appear at the top of each file\n"
+ end
+ gsub!(/\b(p)earl\b/i, '\1erl') # Correct typos, preserving case
+ print
+ ARGF.close if ARGF.eof
+end
+
+
+# @@PLEAC@@_7.10
+File.open('itest', 'r+') do |f| # open file for update
+ lines = f.readlines # read into array of lines
+ lines.each do |it| # modify lines
+ it.gsub!(/foo/, 'QQQ')
+ end
+ f.pos = 0 # back to start
+ f.print lines # write out modified lines
+ f.truncate(f.pos) # truncate to new length
+end # file is automatically closed
+#-----------------------------
+File.open('itest', 'r+') do |f|
+ out = ""
+ f.each do |line|
+ out << line.gsub(/DATE/) {Time.now}
+ end
+ f.pos = 0
+ f.print out
+ f.truncate(f.pos)
+end
+
+# @@PLEAC@@_7.11
+File.open('infile', 'r+') do |f|
+ f.flock File::LOCK_EX
+ # update file
+end
+#-----------------------------
+File::LOCK_SH # shared lock (for reading)
+File::LOCK_EX # exclusive lock (for writing)
+File::LOCK_NB # non-blocking request
+File::LOCK_UN # free lock
+#-----------------------------
+unless f.flock File::LOCK_EX | File::LOCK_NB
+ warn "can't get immediate lock: blocking ..."
+ f.flock File::LOCK_EX
+end
+#-----------------------------
+File.open('numfile', File::RDWR|File::CREAT) do |f|
+ f.flock(File::LOCK_EX)
+ num = f.gets.to_i || 0
+ f.pos = 0
+ f.truncate 0
+ f.puts num + 1q
+end
+
+
+# @@PLEAC@@_7.12
+output_handle.sync = true
+# Please note that like in Perl, $stderr is already unbuffered
+#-----------------------------
+#!/usr/bin/ruby -w
+# seeme - demo stdio output buffering
+$stdout.sync = ARGV.size > 0
+print "Now you don't see it..."
+sleep 2
+puts "now you do"
+#-----------------------------
+$stderr.sync = true
+afile.sync = false
+#-----------------------------
+# assume 'remote_con' is an interactive socket handle,
+# but 'disk_file' is a handle to a regular file.
+remote_con.sync = true # unbuffer for clarity
+disk_file.sync = false # buffered for speed
+#-----------------------------
+require 'socket'
+sock = TCPSocket.new('www.ruby-lang.org', 80)
+sock.sync = true
+sock.puts "GET /en/ HTTP/1.0 \n\n"
+resp = sock.read
+print "DOC IS: #{resp}\n"
+
+
+# @@PLEAC@@_7.13
+#-----------------------------
+# assumes fh1, fh2, fh2 are oen IO objects
+nfound = select([$stdin, fh1, fh2, fh3], nil, nil, 0)
+nfound[0].each do |file|
+ case file
+ when fh1
+ # do something with fh1
+ when fh2
+ # do something with fh2
+ when fh3
+ # do something with fh3
+ end
+end
+#-----------------------------
+input_files = []
+# repeat next line for all in-files to poll
+input_files << fh1
+if nfound = select(input_files, nil, nil, 0)
+ # input ready on files in nfound[0]
+end
+
+
+# @@PLEAC@@_8.0
+#-----------------------------
+# datafile is a file or IO object
+datafile.readlines.each { |line|
+ line.chomp!
+ size = line.length
+ puts size
+}
+#-----------------------------
+datafile.readlines.each { |line|
+ puts line.chomp!.length
+}
+#-----------------------------
+lines = datafile.readlines
+#-----------------------------
+whole_file = file.read
+#-----------------------------
+# ruby -040 -e 'word = gets; puts "First word is #{word}"'
+#-----------------------------
+# ruby -ne 'BEGIN { $/="%%\n" }; $_.chomp; puts $_ if( $_=~/Unix/i)' fortune.dat
+#-----------------------------
+handle.print "one", "two", "three" # "onetwothree"
+puts "Baa baa black sheep." # sent to $stdout
+#-----------------------------
+buffer = handle.read(4096)
+rv = buffer.length
+#-----------------------------
+handle.truncate(length)
+open("/tmp#{$$}.pid", 'w') { |handle| handle.truncate(length) }
+#-----------------------------
+pos = datafile.pos # tell is an alias of pos
+puts "I'm #{pos} bytes from the start of datafile"
+#-----------------------------
+logfile.seek(0, IO::SEEK_END)
+datafile.seek(pos) # IO::SEEK_SET is the default
+out.seek(-20, IO::SEEK_CUR)
+#-----------------------------
+written = datafile.syswrite(mystring)
+raise RunTimeError unless written == mystring.length
+block = infile.sysread(256) # no equivalent to perl offset parameter in sysread
+puts "only read #{block.length} bytes" if 256 != block.length
+#-----------------------------
+pos = handle.sysseek(0, IO::SEEK_CUR) # don't change position
+
+
+# @@PLEAC@@_8.1
+while (line = fh.gets)
+ line.chomp!
+ nextline = nil
+ line.gsub!(/\\$/) { |match| nextline = fh.gets; '' }
+ if (nextline != nil)
+ line += nextline
+ redo
+ end
+ # process full record in line here
+end
+#-----------------------------
+# DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) \
+# $(TEXINFOS) $(INFOS) $(MANS) $(DATA)
+# DEP_DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) \
+# $(TEXINFOS) $(INFO_DEPS) $(MANS) $(DATA) \
+# $(EXTRA_DIST)
+#-----------------------------
+line.gsub!(/\\\s*$/, '') {
+ # as before
+}
+
+
+# @@PLEAC@@_8.2
+#-----------------------------
+count = `wc -l < #{filename}`
+fail "wc failed: #{$?}" if $? != 0
+count.chomp!
+#-----------------------------
+count = 0
+File.open(file, 'r') { |fh|
+ count += 1 while fh.gets
+}
+# count now holds the number of lines read
+#-----------------------------
+count = 0
+while (chunk = file.sysread(2**16))
+ count += chunk.count("\n")
+end rescue EOFError
+#-----------------------------
+File.open(filename,'r') { |fh|
+ count += 1 while fh.gets
+}
+# count now holds the number of lines read
+#-----------------------------
+# As ruby doesn't quite have an equivalent to using a for
+# statement as in perl, I threw this in
+count = File.readlines(filename).size
+#-----------------------------
+1 while file.gets
+count = $.
+#-----------------------------
+$/ = ''
+open(filename, 'r') { |fh|
+ 1 while fh.gets
+ para_count = $.
+} rescue fail("can't open #{filename}: $!")
+#-----------------------------
+
+
+# ^^PLEAC^^_8.3
+#-----------------------------
+while (gets)
+ split.each { |chunk|
+ # do something with chunk
+ }
+end
+#-----------------------------
+while (gets)
+ gsub(/(\w[\w'-]*)/) { |word|
+ # do something with word
+ }
+end
+#-----------------------------
+# Make a word frequency count
+# normally hashes can be created using {} or just Hash.new
+# but we want the default value of an entry to be 0 instead
+# of nil. (nil can't be incremented)
+seen = Hash.new(0)
+while (gets)
+ gsub(/(\w[\w'-]*)/) { |word|
+ seen[word.downcase] += 1
+ }
+end
+# output hash in a descending numeric sort of its values
+seen.sort { |a,b| b[1] <=> a[1] }.each do |k,v|
+ printf("%5d %s\n", v, k )
+end
+
+#-----------------------------
+# Line frequency count
+seen = Hash.new(0)
+while (gets)
+ seen[$_.downcase] += 1
+end
+seen.sort { |a,b| b[1] <=> a[1] }.each do |k,v|
+ printf("%5d %s\n", v, k )
+end
+#-----------------------------
+
+
+# @@PLEAC@@_8.4
+#-----------------------------
+# instead of file handle FILE, we can just
+# use a string containing the filename
+File.readlines(file).each { |line|
+ # do something with line
+}
+#-----------------------------
+File.readlines(file).reverse_each { |line|
+ # do something with line
+}
+#-----------------------------
+# the variable lines might have been created
+# this way
+# lines = File.readlines(file)
+#
+# normally one would use the reverse_each, but
+# if you insist on using a numerical index to
+# iterate over the lines array...
+(lines.size - 1).downto(0) { |i|
+ line = lines[i]
+}
+#-----------------------------
+# the second readlines argument is a the
+# record separator $/, just like perl, a blank
+# separator splits the records into paragraphs
+File.readlines(file, '').each { |paragraph|
+ # do something with paragraph
+ puts "->Paragraph #{paragraph}"
+}
+#-----------------------------
+
+
+# @@PLEAC@@_8.6
+
+$/ = "%\n";
+srand;
+
+File.open('/usr/share/fortune/humorists').each do |line|
+ adage = line if rand($.) < 1
+end
+
+puts adage;
+
+
+# @@PLEAC@@_8.10
+begin
+ fh = File.open(file, "r+")
+ addr = fh.tell unless fh.eof while fh.gets
+ fh.truncate(addr)
+rescue SystemCallError
+ $stderr.puts "#$!"
+end
+
+
+# @@PLEAC@@_9.0
+entry = File.stat("/usr/bin/vi")
+entry = File.stat("/usr/bin")
+entry = File.stat(INFILE)
+
+entry = File.stat("/usr/bin/vi")
+ctime = entry.ctime
+size = entry.size
+
+f = File.open(filename, "r")
+
+## There is no -T equivalent in Ruby, but we can still test emptiness
+if test(?s, filename)
+ puts "#{filename} doesn't have text in it."
+ exit
+end
+
+Dir.new("/usr/bin").each do |filename|
+ puts "Inside /usr/bin is something called #{filename}"
+end
+
+
+# @@PLEAC@@_9.1
+file = File.stat("filename")
+readtime, writetime = file.atime, file.mtime
+file.utime(readtime, writetime)
+
+SECONDS_PER_DAY = 60 * 60 * 24
+file = File.stat("filename")
+atime, mtime = file.atime, file.mtime
+
+atime -= 7 * SECONDS_PER_DAY
+mtime -= 7 * SECONDS_PER_DAY
+
+File.utime(atime, mtime, file)
+mtime = File.stat(file).mtime
+File.utime(Time.new, mtime, file)
+File.utime(Time.new, File.stat("testfile").mtime, file)
+
+#-----------------------------
+#!/usr/bin/ruby -w
+## uvi - vi a file without changing it's access times
+
+if ARGV.length != 1
+ puts "usage: uvi filename"
+ exit
+end
+file = ARGV[0]
+atime, mtime = File.stat(file).atime, File.stat(file).mtime
+system(ENV["EDITOR"] || "vi", file)
+File.utime(atime, mtime, file)
+#-----------------------------
+
+
+# @@PLEAC@@_9.2
+File.unlink(FILENAME)
+
+err_flg = false
+filenames.each do |file|
+ begin
+ File.unlink(file)
+ rescue
+ err_flg = $!
+ end
+end
+err_flg and raise "Couldn't unlink all of #{filenames.join(" ")}: #{err_flg}"
+
+File.unlink(file)
+
+count = filenames.length
+filenames.each do |file|
+ begin
+ File.unlink(file)
+ rescue
+ count -= 1
+ end
+end
+if count != filenames.length
+ STDERR.puts "could only delete #{count} of #{filenames.length} files"
+end
+
+
+# @@PLEAC@@_9.3
+require "ftools"
+File.copy(oldfile, newfile)
+
+infile = File.open(oldfile, "r")
+outfile = File.open(newfile, "w")
+
+blksize = infile.stat.blksize
+# This doesn't handle partial writes or ^Z
+# like the Perl version does.
+while (line = infile.read(blksize))
+ outfile.write(line)
+end
+
+infile.close
+outfile.close
+
+system("cp #{oldfile} #{newfile}") # unix
+system("copy #{oldfile} #{newfile}") # dos, vms
+
+require "ftools"
+File.copy("datafile.dat", "datafile.bak")
+File.move("datafile.new", "datafile.dat")
+
+
+# @@PLEAC@@_9.4
+$seen = {} # must use global var to be seen inside of method below
+
+def do_my_thing(filename)
+ dev, ino = File.stat(filename).dev, File.stat(filename).ino
+ unless $seen[[dev, ino]]
+ # do something with $filename because we haven't
+ # seen it before
+ end
+ $seen[[dev, ino]] = $seen[[dev, ino]].to_i + 1
+end
+
+files.each do |filename|
+ dev, ino = File.stat(filename).dev, File.stat(filename).ino
+ if !$seen.has_key?([dev, ino])
+ $seen[[dev, ino]] = []
+ end
+ $seen[[dev, ino]].push(filename)
+end
+
+$seen.keys.sort.each do |devino|
+ ino, dev = devino
+ if $seen[devino].length > 1
+ # $seen[devino] is a list of filenames for the same file
+ end
+end
+
+
+# @@PLEAC@@_9.5
+Dir.open(dirname) do |dir|
+ dir.each do |file|
+ # do something with dirname/file
+ puts file
+ end
+end
+# Dir.close is automatic
+
+# No -T equivalent in Ruby
+
+dir.each do |file|
+ next if file =~ /^\.\.?$/
+ # ...
+end
+
+def plainfiles(dir)
+ dh = Dir.open(dir)
+ dh.entries.grep(/^[^.]/).
+ map {|file| "#{dir}/#{file}"}.
+ find_all {|file| test(?f, file)}.
+ sort
+end
+
+
+# @@PLEAC@@_9.6
+list = Dir.glob("*.c")
+
+dir = Dir.open(path)
+files = dir.entries.grep(/\.c$/)
+dir.close
+
+files = Dir.glob("*.c")
+files = Dir.open(path).entries.grep(/\.[ch]$/i)
+
+dir = Dir.new(path)
+files = dir.entries.grep(/\.[ch]$/i)
+
+begin
+ d = Dir.open(dir)
+rescue Errno::ENOENT
+ raise "Couldn't open #{dir} for reading: #{$!}"
+end
+
+files = []
+d.each do |file|
+ puts file
+ next unless file =~ /\.[ch]$/i
+
+ filename = "#{dir}/#{file}"
+ # There is no -T equivalent in Ruby, but we can still test emptiness
+ files.push(filename) if test(?s, filename)
+end
+
+dirs.entries.grep(/^\d+$/).
+ map { |file| [file, "#{path}/#{file}"]} .
+ select { |file| test(?d, file[1]) }.
+ sort { |a,b| a[0] <=> b[0] }.
+ map { |file| file[1] }
+
+
+# @@PLEAC@@_9.7
+require 'find'
+Find.find(dirlist) do |file|
+ # do whatever
+end
+
+require 'find'
+argv = ARGV.empty? ? %w{.} : ARGV
+Find.find(*argv) do |file|
+ print file, (test(?d, file) ? "/\n" : "\n")
+end
+
+require 'find'
+argv = ARGV.empty? ? %w{.} : ARGV
+sum = 0
+Find.find(*argv) do |file|
+ size = test(?s, file) || 0
+ sum += size
+end
+puts "#{argv.join(' ')} contains #{sum} bytes"
+
+require 'find'
+argv = ARGV.empty? ? %w{.} : ARGV
+saved_size, saved_name = -1, ""
+Find.find(*argv) do |file|
+ size = test(?s, file) || 0
+ next unless test(?f, file) && size > saved_size
+ saved_size = size
+ saved_name = file
+end
+puts "Biggest file #{saved_name} in #{argv.join(' ')} is #{saved_size}"
+
+require 'find'
+argv = ARGV.empty? ? %w{.} : ARGV
+age, name = nil
+Find.find(*argv) do |file|
+ mtime = File.stat(file).mtime
+ next if age && age > mtime
+ age = mtime
+ name = file
+end
+puts "#{name} #{age}"
+
+#-----------------------------
+#!/usr/bin/ruby -w
+# fdirs - find all directories
+require 'find'
+argv = ARGV.empty? ? %w{.} : ARGV
+File.find(*argv) { |file| puts file if test(?d, file) }
+#-----------------------------
+
+
+# @@PLEAC@@_9.8
+require 'fileutils'
+
+puts "Usage #{$0} dir ..." if ARGV.empty?
+ARGV.each do |dir|
+ FileUtils.rmtree(dir)
+end
+
+
+# @@PLEAC@@_9.9
+require 'ftools'
+names.each do |file|
+ newname = file
+ begin
+ File.move(file, newname)
+ rescue Errno::EPERM
+ $stderr.puts "Couldn't rename #{file} to #{newname}: #{$!}"
+ end
+end
+
+require 'ftools'
+op = ARGV.empty? ? (raise "Usage: rename expr [files]\n") : ARGV.shift
+argv = ARGV.empty? ? $stdin.readlines.map { |f| f.chomp } : ARGV
+argv.each do |file|
+ was = file
+ file = eval("file.#{op}")
+ File.move(was, file) unless was == file
+end
+
+
+# @@PLEAC@@_9.10
+base = File.basename(path)
+dir = File.dirname(path)
+# ruby has no fileparse equivalent
+dir, base = File.split(path)
+ext = base.scan(/\..*$/).to_s
+
+path = '/usr/lib/libc.a'
+file = File.basename(path)
+dir = File.dirname(path)
+
+puts "dir is #{dir}, file is #{file}"
+# dir is /usr/lib, file is libc.a
+
+path = '/usr/lib/libc.a'
+dir, filename = File.split(path)
+name, ext = filename.split(/(?=\.)/)
+puts "dir is #{dir}, name is #{name}, ext is #{ext}"
+# NOTE: The Ruby code prints
+# dir is /usr/lib, name is libc, extension is .a
+# while the Perl code prints a '/' after the directory name
+# dir is /usr/lib/, name is libc, extension is .a
+
+# No fileparse_set_fstype() equivalent in ruby
+
+def extension(path)
+ ext = path.scan(/\..*$/).to_s
+ ext.sub(/^\./, "")
+end
+
+
+# @@PLEAC@@_9.11
+#-----------------------------
+#!/usr/bin/ruby -w
+# symirror - build spectral forest of symlinks
+
+require 'find'
+require 'fileutils'
+
+raise "usage: #{$0} realdir mirrordir" unless ARGV.size == 2
+
+srcdir,dstdir = ARGV
+srcmode = File::stat(srcdir).mode
+Dir.mkdir(dstdir, srcmode & 07777) unless test(?d, dstdir)
+
+# fix relative paths
+Dir.chdir(srcdir) {srcdir = Dir.pwd}
+Dir.chdir(dstdir) {dstdir = Dir.pwd}
+
+Find.find(srcdir) do |srcfile|
+ if test(?d, srcfile)
+ dest = srcfile.sub(/^#{srcdir}/, dstdir)
+ dmode = File::stat(srcfile).mode & 07777
+ Dir.mkdir(dest, dmode) unless test(?d, dest)
+ a = Dir["#{srcfile}/*"].reject{|f| test(?d, f)}
+ FileUtils.ln_s(a, dest)
+ end
+end
+
+
+# @@PLEAC@@_9.12
+# we use the Getopt/Declare library here for convenience:
+# http://raa.ruby-lang.org/project/getoptdeclare/
+#-----------------------------
+#!/usr/bin/ruby -w
+# lst - list sorted directory contents (depth first)
+
+require 'find'
+require 'etc'
+require "Getopt/Declare"
+
+# Note: in the option-spec below there must by at least one hard
+# tab in between each -option and its description. For example
+# -i <tab> read from stdin
+
+opts = Getopt::Declare.new(<<'EOPARAM')
+ ============
+ Input Format:
+ -i read from stdin
+ ============
+ Output Format:
+ -l long listing
+ -r reverse listing
+ ============
+ Sort on: (one of)
+ -m mtime (modify time - default)
+ {$sort_criteria = :mtime}
+ -u atime (access time)
+ {$sort_criteria = :atime}
+ -c ctime (inode change time)
+ {$sort_criteria = :ctime}
+ -s size
+ {$sort_criteria = :size}
+ [mutex: -m -u -c -s]
+
+EOPARAM
+
+$sort_criteria ||= :mtime
+files = {}
+DIRS = opts['-i'] ? $stdin.readlines.map{|f|f.chomp!} : ARGV
+DIRS.each do |dir|
+ Find.find(dir) do |ent|
+ files[ent] = File::stat(ent)
+ end
+end
+entries = files.keys.sort_by{|f| files[f].send($sort_criteria)}
+entries = entries.reverse unless opts['-r']
+
+entries.each do |ent|
+ unless opts['-l']
+ puts ent
+ next
+ end
+ stats = files[ent]
+ ftime = stats.send($sort_criteria == :size ? :mtime : $sort_criteria)
+ printf "%6d %04o %6d %8s %8s %8d %s %s\n",
+ stats.ino,
+ stats.mode & 07777,
+ stats.nlink,
+ ETC::PASSWD[stats.uid].name,
+ ETC::GROUP[stats.gid].name,
+ stats.size,
+ ftime.strftime("%a %b %d %H:%M:%S %Y"),
+ ent
+end
+
+
+# @@PLEAC@@_10.0
+def hello
+ $greeted += 1 # in Ruby, a variable beginning with $ is global (can be any type of course)
+ puts "hi there!"
+end
+
+# We need to initialize $greeted before it can be used, because "+=" is waiting a Numeric object
+$greeted = 0
+hello # note that appending () is optional to function calls with no parameters
+
+
+# @@PLEAC@@_10.1
+# In Ruby, parameters are named anyway
+def hypotenuse(side1, side2)
+ Math.sqrt(side1**2 + side2**2) # the sqrt function comes from the Math module
+end
+diag = hypotenuse(3, 4)
+
+puts hypotenuse(3, 4)
+
+a = [3, 4]
+print hypotenuse(*a) # the star operator will magically convert an Array into a "tuple"
+
+both = men + women
+
+# In Ruby, all objects are references, so the same problem arises; we then return a new object
+nums = [1.4, 3.5, 6.7]
+def int_all(n)
+ n.collect { |v| v.to_i }
+end
+ints = int_all(nums)
+
+nums = [1.4, 3.5, 6.7]
+def trunc_em(n)
+ n.collect! { |v| v.to_i } # the bang-version of collect modifies the object
+end
+trunc_em(nums)
+
+# Ruby has two chomp version:
+# ``chomp'' chomps the record separator and returns what's expected
+# ``chomp!'' does the same but also modifies the parameter object
+
+
+# @@PLEAC@@_10.2
+def somefunc
+ variable = something # variable is local by default
+end
+
+name, age = ARGV
+start = fetch_time
+
+a, b = pair # will succeed if pair is an Array object (like ARGV is)
+c = fetch_time
+
+# In ruby, run_check can't access a, b, or c until they are
+# explicitely defined global (using leading $), even if they are
+# both defined in the same scope
+
+def check_x(x)
+ y = "whatever"
+ run_check
+ if $condition
+ puts "got $x"
+ end
+end
+
+# The following will keep a reference to the array, though the
+# results will be slightly different from perl: the last element
+# of $global_array will be itself an array
+def save_array(ary)
+ $global_array << ary
+end
+
+# The following gives the same results as in Perl for $global_array,
+# though it doesn't illustrate anymore the way to keep a reference
+# to an object: $global_array is extended with the elements of ary
+def save_array(ary)
+ $global_array += ary
+end
+
+
+# @@PLEAC@@_10.3
+# In Ruby, AFAIK a method cannot access "local variables" defined
+# upper scope; mostly because everything is an object, so you'll
+# do the same by defining an attribute or a static attribute
+
+# In Ruby the BEGIN also exists:
+BEGIN { puts "hello from BEGIN" }
+puts "hello from main"
+BEGIN { puts "hello from 2nd BEGIN" }
+# gives:
+# hello from BEGIN
+# hello from 2nd BEGIN
+# hello from main
+
+# In Ruby, it can be written as a static method and a static
+# variable
+class Counter
+ @@counter = 0
+ def Counter.next_counter; @@counter += 1; end
+end
+
+# There is no need of BEGIN since the variable will get
+# initialized when parsing
+class Counter
+ @@counter = 42
+ def Counter.next_counter; @@counter += 1; end
+ def Counter.prev_counter; @@counter -= 1; end
+end
+
+
+# @@PLEAC@@_10.4
+# You can either get the whole trace as an array of strings, each
+# string telling which file, line and method is calling:
+caller
+
+# ...or only the last caller
+caller[0]
+
+# We need to extract just the method name of the backtrace:
+def whoami; caller()[0] =~ /in `([^']+)'/ ? $1 : '(anonymous)'; end
+def whowasi; caller()[1] =~ /in `([^']+)'/ ? $1 : '(anonymous)'; end
+
+
+# @@PLEAC@@_10.5
+# In Ruby, every value is a reference on an object, thus there is
+# no such problem
+array_diff(array1, array2)
+
+def add_vecpair(a1, a2)
+ results = []
+ a1.each_index { |i| results << (a1[i] + a2[i]) }
+ results
+end
+a = [1, 2]
+b = [5, 8]
+c = add_vecpair(a, b)
+p c
+
+# Add this to the beginning of the function to check if we were
+# given two arrays
+a1.type == Array && a2.type == Array or
+ raise "usage: add_vecpair array1 array2 (was used with: #{a1.type} #{a2.type})"
+
+
+# @@PLEAC@@_10.6
+# There is no return context in Ruby
+
+
+# @@PLEAC@@_10.7
+# Like in Perl, we need to fake with a hash, but it's dirty :-(
+def thefunc(param_args)
+ args = { 'INCREMENT' => '10s', 'FINISH' => '0', 'START' => 0 }
+ args.update(param_args)
+ if (args['INCREMENT'] =~ /m$/ )
+ # .....
+ end
+end
+
+thefunc({ 'INCREMENT' => '20s', 'START' => '+5m', 'FINISH' => '+30m' })
+thefunc({})
+
+
+# @@PLEAC@@_10.8
+# there is no "undef" direct equivalent but there is the slice equiv:
+a, c = func.indexes(0, 2)
+
+
+# @@PLEAC@@_10.9
+# Ruby has no such limitation:
+def somefunc
+ ary = []
+ hash = {}
+ # ...
+ return ary, hash
+end
+arr, dict = somefunc
+
+array_of_hashes = fn
+h1, h2, h3 = fn
+
+
+# @@PLEAC@@_10.10
+return
+# or (equivalent)
+return nil
+
+
+# @@PLEAC@@_10.11
+# You can't prototype in Ruby regarding types :-(
+# Though, you can force the number of arguments:
+def func_with_no_arg; end
+def func_with_no_arg(); end
+def func_with_one_arg(a1); end
+def func_with_two_args(a1, a2); end
+def func_with_any_number_of_args(*args); end
+
+
+# @@PLEAC@@_10.12
+raise "some message" # raise exception
+
+begin
+ val = func
+rescue Exception => msg
+ $stderr.puts "func raised an exception: #{msg}"
+end
+
+# In Ruby the rescue statement uses an exception class, every
+# exception which is not matched is still continuing
+begin
+ val = func
+rescue FullMoonError
+ ...
+end
+
+
+# @@PLEAC@@_10.13
+# Saving Global Values
+# Of course we can just save the value and restore it later:
+def print_age
+ puts "Age is #{$age}"
+end
+
+$age = 18 # global variable
+print_age()
+if condition
+ safeage = $age
+ $age = 23
+ print_age()
+ $age = safeage
+end
+
+# We can also use a method that saves the global variable and
+# restores it automatically when the block is left:
+
+def local(var)
+ eval("save = #{var.id2name}")
+ begin
+ result = yield
+ ensure
+ # we want to call this even if we got an exception
+ eval("#{var.id2name} = save")
+ end
+ result
+end
+
+condition = true
+$age = 18
+print_age()
+if condition
+ local(:$age) {
+ $age = 23
+ print_age()
+ }
+end
+print_age()
+
+# There is no need to use local() for filehandles or directory
+# handles in ruby because filehandles are normal objects.
+
+
+# @@PLEAC@@_10.14
+# In Ruby you may redefine a method [but not overload it :-(]
+# just by defining again with the same name.
+def foo; puts 'foo'; end
+def foo; puts 'bar'; end
+foo
+#=> bar
+
+# You can also take a reference to an existing method before
+# redefining a new one, using the `alias' keyword
+def foo; puts 'foo'; end
+alias foo_orig foo
+def foo; puts 'bar'; end
+foo_orig
+foo
+#=> foo
+#=> bar
+
+# AFAIK, there is no direct way to create a new method whose name
+# comes from a variable, so use "eval"
+colors = %w(red blue green yellow orange purple violet)
+colors.each { |c|
+ eval <<-EOS
+ def #{c}(*a)
+ "<FONT COLOR='#{c}'>" + a.to_s + "</FONT>"
+ end
+ EOS
+}
+
+
+# @@PLEAC@@_10.15
+def method_missing(name, *args)
+ "<FONT COLOR='#{name}'>" + args.join(' ') + "</FONT>"
+end
+puts chartreuse("stuff")
+
+
+# @@PLEAC@@_10.16
+def outer(arg)
+ x = arg + 35
+ inner = proc { x * 19 }
+ x + inner.call()
+end
+
+
+# @@PLEAC@@_10.17
+#!/usr/bin/ruby -w
+# mailsort - sort mbox by different criteria
+require 'English'
+require 'Date'
+
+# Objects of class Mail represent a single mail.
+class Mail
+ attr_accessor :no
+ attr_accessor :subject
+ attr_accessor :fulltext
+ attr_accessor :date
+
+ def initialize
+ @fulltext = ""
+ @subject = ""
+ end
+
+ def append(para)
+ @fulltext << para
+ end
+
+ # this is called if you call puts(mail)
+ def to_s
+ @fulltext
+ end
+end
+
+# represents a list of mails.
+class Mailbox < Array
+
+ Subjectpattern = Regexp.new('Subject:\s*(?:Re:\s*)*(.*)\n')
+ Datepattern = Regexp.new('Date:\s*(.*)\n')
+
+ # reads mails from open file and stores them
+ def read(file)
+ $INPUT_RECORD_SEPARATOR = '' # paragraph reads
+ msgno = -1
+ file.each { |para|
+ if para =~ /^From/
+ mail = Mail.new
+ mail.no = (msgno += 1)
+ md = Subjectpattern.match(para)
+ if md
+ mail.subject = md[1]
+ end
+ md = Datepattern.match(para)
+ if md
+ mail.date = DateTime.parse(md[1])
+ else
+ mail.date = DateTime.now
+ end
+ self.push(mail)
+ end
+ mail.append(para) if mail
+ }
+ end
+
+ def sort_by_subject_and_no
+ self.sort_by { |m|
+ [m.subject, m.no]
+ }
+ end
+
+ # sorts by a list of attributs of mail, given as symbols
+ def sort_by_attributs(*attrs)
+ # you can sort an Enumerable by an array of
+ # values, they would be compared
+ # from ary[0] to ary[n]t, say:
+ # ['b',1] > ['a',10] > ['a',9]
+ self.sort_by { |elem|
+ attrs.map { |attr|
+ elem.send(attr)
+ }
+ }
+ end
+
+end
+
+mailbox = Mailbox.new
+mailbox.read(ARGF)
+
+# print only subjects sorted by subject and number
+for m in mailbox.sort_by_subject_and_no
+ puts(m.subject)
+end
+
+# print complete mails sorted by date, then subject, then number
+for m in mailbox.sort_by_attributs(:date, :subject)
+ puts(m)
+end
+
+
+# @@PLEAC@@_11.7
+def mkcounter(count)
+ start = count
+ bundle = {
+ "NEXT" => proc { count += 1 },
+ "PREV" => proc { count -= 1 },
+ "RESET" => proc { count = start }
+ }
+ bundle["LAST"] = bundle["PREV"]
+ return bundle
+end
+
+c1 = mkcounter(20)
+c2 = mkcounter(77)
+
+puts "next c1: #{c1["NEXT"].call}" # 21
+puts "next c2: #{c2["NEXT"].call}" # 78
+puts "next c1: #{c1["NEXT"].call}" # 22
+puts "last c1: #{c1["PREV"].call}" # 21
+puts "last c1: #{c1["LAST"].call}" # 20
+puts "old c2: #{c2["RESET"].call}" # 77
+
+
+# @@PLEAC@@_11.15
+class Binary_tree
+ def initialize(val)
+ @value = val
+ @left = nil
+ @right = nil
+ end
+
+ # insert given value into proper point of
+ # provided tree. If no tree provided,
+ # use implicit pass by reference aspect of @_
+ # to fill one in for our caller.
+ def insert(val)
+ if val < @value then
+ if @left then
+ @left.insert(val)
+ else
+ @left = Binary_tree.new(val)
+ end
+ elsif val > @value then
+ if @right then
+ @right.insert(val)
+ else
+ @right = Binary_tree.new(val)
+ end
+ else
+ puts "double"
+ # do nothing, no double values
+ end
+ end
+
+ # recurse on left child,
+ # then show current value,
+ # then recurse on right child.
+ def in_order
+ @left.in_order if @left
+ print @value, " "
+ @right.in_order if @right
+ end
+
+ # show current value,
+ # then recurse on left child,
+ # then recurse on right child.
+ def pre_order
+ print @value, " "
+ @left.pre_order if @left
+ @right.pre_order if @right
+ end
+
+ # recurse on left child,
+ # then recurse on right child,
+ # then show current value.
+ def post_order
+ @left.post_order if @left
+ @right.post_order if @right
+ print @value, " "
+ end
+
+ # find out whether provided value is in the tree.
+ # if so, return the node at which the value was found.
+ # cut down search time by only looking in the correct
+ # branch, based on current value.
+ def search(val)
+ if val == @value then
+ return self
+ elsif val < @value then
+ return @left.search(val) if @left
+ return nil
+ else
+ return @right.search(val) if @right
+ return nil
+ end
+ end
+end
+
+# first generate 20 random inserts
+test = Binary_tree.new(0)
+for a in 0..20
+ test.insert(rand(1000))
+end
+
+# now dump out the tree all three ways
+print "Pre order: "; test.pre_order; puts ""
+print "In order: "; test.in_order; puts ""
+print "Post order: "; test.post_order; puts ""
+
+print "search?"
+while gets
+ print test.search($_.to_i)
+ print "\nsearch?"
+end
+
+
+# @@PLEAC@@_12.0
+# class and module names need to have the first letter capitalized
+module Alpha
+ NAME = 'first'
+end
+module Omega
+ NAME = 'last'
+end
+puts "Alpha is #{Alpha::NAME}, Omega is #{Omega::NAME}"
+
+# ruby doesn't differentiate beteen compile-time and run-time
+require 'getoptlong.rb'
+require 'getoptlong' # assumes the .rb
+require 'cards/poker.rb'
+require 'cards/poker' # assumes the .rb
+load 'cards/poker' # require only loads the file once
+
+module Cards
+ module Poker
+ @card_deck = Array.new # or @card_deck = []
+ def shuffle
+ end
+ end
+end
+
+
+# @@PLEAC@@_12.1
+# a module exports all of its functions
+module Your_Module
+ def self.function
+ # this would be called as Your_Module.function
+ end
+
+ def Your_Module.another
+ # this is the same as above, but more specific
+ end
+end
+
+# @@PLEAC@@_12.2
+begin
+ require 'nonexistent'
+rescue LoadError
+ puts "Couldn't load #{$!}" # $! contains the last error string
+end
+
+# @@PLEAC@@_12.4
+# module variables are private unless access functions are defined
+module Alpha
+ @aa = 10
+ @bb = 11
+
+ def self.put_aa
+ puts @aa
+ end
+
+ def self.bb=(val)
+ @bb = val
+ end
+end
+
+Alpha.bb = 12
+# Alpha.aa = 10 # error, no aa=method
+
+
+# @@PLEAC@@_12.5
+# caller provides a backtrace of the call stack
+module MyModule
+ def find_caller
+ caller
+ end
+
+ def find_caller2(i)
+ caller(i) # an argument limits the size of the stack returned
+ end
+end
+
+
+# @@PLEAC@@_12.6
+BEGIN {
+ $logfile = '/tmp/mylog' unless defined? $logfile
+ $LF = File.open($logfile, 'a')
+}
+
+module Logger
+ def self.logmsg(msg)
+ $LF.puts msg
+ end
+
+ logmsg('startup')
+end
+
+END {
+ Logger::logmsg('shutdown')
+ $LF.close
+}
+
+
+# @@PLEAC@@_12.7
+#-----------------------------
+# results may be different on your system
+# % ruby -e "$LOAD_PATH.each_index { |i| printf("%d %s\n", i, $LOAD_PATH[i] }
+#0 /usr/local/lib/site_ruby/1.6
+#1 /usr/local/lib/site_ruby/1.6/i386-linux
+#2 /usr/local/lib/site_ruby/
+#3 /usr/lib/ruby/1.6
+#4 /usr/lib/ruby/1.6/i136-linux
+#5 .
+#-----------------------------
+# syntax for sh, bash, ksh, or zsh
+#$ export RUBYLIB=$HOME/rubylib
+
+# syntax for csh or tcsh
+# % setenv RUBYLIB ~/rubylib
+#-----------------------------
+$LOAD_PATH.unshift "/projects/spectre/lib";
+
+
+# @@PLEAC@@_12.8
+# equivalents in ruby are mkmf, SWIG, or Ruby/DL depending on usage
+
+
+# @@PLEAC@@_12.9
+# no equivalent in ruby
+
+
+# @@PLEAC@@_12.10
+# no equivalent in ruby
+
+
+# @@PLEAC@@_12.11
+module FineTime
+ def self.time
+ # to be defined later
+ end
+end
+
+
+module FineTime
+ def self.time
+ "its a fine time"
+ end
+end
+
+puts FineTime.time #=> "its a fine time"
+
+
+# @@PLEAC@@_12.12
+def even_only(n)
+ raise "#{n} is not even" if (n & 1) != 0 # one way to test
+ # ...
+end
+def even_only(n)
+ $stderr.puts "#{n} is not even" if (n & 1) != 0
+ # ...
+end
+
+
+# @@PLEAC@@_12.17
+# The library archive for ruby is called Ruby Application archive,
+# or shorter RAA, and can be found at http://raa.ruby-lang.org.
+# A typical library is installed like this:
+# % gunzip some-module-4.54.tar.gz
+# % tar xf some-module-4.54.tar
+# % cd some-module-4.54.tar
+# % ruby install.rb config
+# % ruby install.rb setup
+# get superuser previleges here if needed for next step
+# % ruby install.rb install
+
+# Some modules use a different process,
+# you should find details in the documentation
+# Here is an example of such a different process
+# % ruby extconf.rb
+# % make
+# % make install
+
+# If you want the module installed in your own directory:
+# For ruby version specific libraries
+# % ruby install.rb config --site-ruby=~/lib
+# For version independent libraries
+# % ruby install.rb config --site-ruby-common=~/lib
+
+# Information about possible options for config
+# % ruby install.rb --help
+
+# If you have your own complete distribution
+# % ruby install.rb --prefix=path=~/ruby-private
+
+
+# @@PLEAC@@_13.0
+# Classes and objects in Ruby are rather straigthforward
+class Person
+ # Class variables (also called static attributes) are prefixed by @@
+ @@person_counter=0
+
+ # object constructor
+ def initialize(age, name, alive = true) # Default arg like in C++
+ @age, @name, @alive = age, name, alive # Object attributes are prefixed by '@'
+ @@person_counter += 1
+ # There is no '++' operator in Ruby. The '++'/'--' operators are in fact
+ # hidden assignments which affect variables, not objects. You cannot accomplish
+ # assignment via method. Since everything in Ruby is object, '++' and '--'
+ # contradict Ruby OO ideology. Instead '-=' and '+=' are used.
+ end
+
+ attr_accessor :name, :age # This creates setter and getter methods for @name
+ # and @age. See 13.3 for detailes.
+
+ # methods modifying the receiver object usually have the '!' suffix
+ def die!
+ @alive = false
+ puts "#{@name} has died at the age of #{@age}."
+ @alive
+ end
+
+ def kill(anotherPerson)
+ print @name, ' is killing ', anotherPerson.name, ".\n"
+ anotherPerson.die!
+ end
+
+ # methods used as queries
+ # usually have the '?' suffix
+ def alive?
+ @alive && true
+ end
+
+ def year_of_birth
+ Time.now.year - @age
+ end
+
+ # Class method (also called static method)
+ def Person.number_of_people
+ @@person_counter
+ end
+end
+
+# Using the class:
+# Create objects of class Person
+lecter = Person.new(47, 'Hannibal')
+starling = Person.new(29, 'Clarice', true)
+pazzi = Person.new(40, 'Rinaldo', true)
+
+# Calling a class method
+print "There are ", Person.number_of_people, " Person objects\n"
+
+print pazzi.name, ' is ', (pazzi.alive?) ? 'alive' : 'dead', ".\n"
+lecter.kill(pazzi)
+print pazzi.name, ' is ', (pazzi.alive?) ? 'alive' : 'dead', ".\n"
+
+print starling.name , ' was born in ', starling.year_of_birth, "\n"
+
+
+# @@PLEAC@@_13.1
+# If you don't need any initialisation in the constructor,
+# you don't need to write a constructor.
+class MyClass
+end
+
+class MyClass
+ def initialize
+ @start = Time.new
+ @age = 0
+ end
+end
+
+class MyClass
+ def initialize(inithash)
+ @start = Time.new
+ @age = 0
+ for key, value in inithash
+ instance_variable_set("@#{key}", value)
+ end
+ end
+end
+
+# @@PLEAC@@_13.2
+# Objects are destroyed by the garbage collector.
+# The time of destroying is not predictable.
+# The ruby garbage collector can handle circular references,
+# so there is no need to write destructor for that.
+
+# There is no direct support for destructor.
+# You can call a custom function, or more specific a proc object, when the
+# garbage collector is about to destruct the object, but it is unpredictable
+# when this occurs.
+# Also if such a finalizer object has a reference to the orignal object,
+# this may prevent the original object to get garbage collected.
+# Because of this problem the finalize method below is
+# a class method and not a instance method.
+# So if you need to free resources for an object, like
+# closing a socket or kill a spawned subprocess,
+# you should do it explicitly.
+
+class MyClass
+ def initialize
+ ObjectSpace.define_finalizer(self,
+ self.class.method(:finalize).to_proc)
+ end
+ def MyClass.finalize(id)
+ puts "Object #{id} dying at #{Time.new}"
+ end
+end
+
+# test code
+3.times {
+ MyClass.new
+}
+ObjectSpace.garbage_collect
+
+
+# @@PLEAC@@_13.3
+# You can write getter and setter methods in a natural way:
+class Person
+ def name
+ @name
+ end
+ def name=(name)
+ @name = name
+ end
+end
+
+# But there is a better and shorter way
+class Person
+ attr_reader :age
+ attr_writer :name
+ # attr_reader and attr_writer are actually methods in class Class
+ # which set getter and setter methods for you.
+end
+
+# There is also attr_accessor to create both setters and getters
+class Person
+ attr_accessor :age, :name
+end
+
+
+# @@PLEAC@@_13.4
+class Person
+ # Class variables (also called static attributes) are prefixed by @@
+ @@person_counter = 0
+
+ def Person.population
+ @@person_counter
+ end
+ def initialize
+ @@person_counter += 1
+ ObjectSpace.define_finalizer(self,
+ self.class.method(:finalize).to_proc)
+ end
+ def Person.finalize(id)
+ @@person_counter -= 1
+ end
+end
+people = []
+10.times {
+ people.push(Person.new)
+}
+printf("There are %d people alive", Person.population)
+
+
+FixedArray.class_max_bounds = 100
+alpha = FixedArray.new
+puts "Bound on alpha is #{alpha.max_bounds}"
+
+beta = FixedArray.new
+beta.max_bounds = 50 # calls the instance method
+beta.class.class_max_bounds = 50 # alternative, calls the class method
+puts "Bound on alpha is #{alpha.max_bounds}"
+
+class FixedArray
+ @@bounds = 7
+
+ def max_bounds
+ @@max_bounds
+ end
+ # instance method, which sets the class variable
+ def max_bounds=(value)
+ @@max_bounds = value
+ end
+ # class method. This can only be called on a class,
+ # but not on the instances
+ def FixedArray.class_max_bounds=(value)
+ @@max_bounds = value
+ end
+end
+
+
+# @@PLEAC@@_13.5
+PersonStruct = Struct.new("Person", :name, :age, :peers)
+# creates a class "Person::Struct", which is accessiable with the
+# constant "PersonStruct"
+p = PersonStruct.new
+p = Struct::Person.new # alternative using the classname
+p.name = "Jason Smythe"
+p.age = 13
+p.peers = ["Wilbur", "Ralph", "Fred"]
+p[:peers] = ["Wilbur", "Ralph", "Fred"] # alternative access using symbol
+p["peers"] = ["Wilbur", "Ralph", "Fred"] # alternative access using name of field
+p[2] = ["Wilbur", "Ralph", "Fred"] # alternative access using index of field
+puts "At age #{p.age}, #{p.name}'s first friend is #{p.peers[0]}"
+
+# The fields of a struct have no special type, like other ruby variables
+# you can put any objects in. Therefore the discussions how to specify
+# the types of the fields do not apply to ruby.
+
+FamilyStruct = Struct.new("Family", :head, :address, :members)
+folks = FamilyStruct.new
+folks.head = PersonStruct.new
+dad = folks.head
+dad.name = "John"
+dad.age = 34
+
+# supply of own accessor method for the struct for error checking
+class PersonStruct
+ def age=(value)
+ if !value.kind_of?(Integer)
+ raise(ArgumentError, "Age #{value} isn't an Integer")
+ elsif value > 150
+ raise(ArgumentError, "Age #{value} is unreasonable")
+ end
+ @age = value
+ end
+end
+
+
+# @@PLEAC@@_13.6
+# The ruby Object class defines a dup and a clone method.
+# The dup method is recommended for prototype object creation.
+# The default implementation makes a shallow copy,
+# but each class can override it, for example to make a deep copy.
+
+# If you want to call 'new' directly on the instances,
+# you can create a instance method "new", which returns a new duplicate.
+# This method is distinct from the class method new.
+#
+class A
+ def new
+ dup
+ end
+end
+
+ob1 = A.new
+# later on
+ob2 = ob1.new
+
+
+# @@PLEAC@@_13.7
+methname = 'flicker'
+obj.send(methname, 10) # calls obj.flicker(10)
+
+# call three methods on the object, by name
+['start', 'run', 'stop'].each do |method_string|
+ obj.send(method_string)
+end
+
+# Another way is to create a Method object
+method_obj = obj.method('flicker')
+# And then call it
+method_obj.call(10)
+
+
+# @@PLEAC@@_13.8
+# All classes in Ruby inherit from class Object
+# and thus all objects share methods defined in this class
+
+# the class of the object
+puts any_object.type
+
+# Ruby classes are actually objects of class Class and they
+# respond to methods defined in Object class as well
+
+# the superclass of this class
+puts any_object.class.superclass
+
+# ask an object whether it is an instance of particular class
+n = 4.7
+puts n.instance_of?(Float) # true
+puts n.instance_of?(Numeric) # false
+
+# ask an object whether it is an instance of class, one of the
+# superclasses of the object, or modules included in it
+puts n.kind_of?(Float) # true (the class)
+puts n.kind_of?(Numeric) # true (an ancestor class)
+puts n.kind_of?(Comparable) # true (a mixin module)
+puts n.kind_of?(String) # false
+
+# ask an object whether it can respond to a particular method
+puts n.respond_to?('+') # true
+puts n.respond_to?('length') # false
+
+# all methods an object can respond to
+'just a string'.methods.each { |m| puts m }
+
+
+# @@PLEAC@@_13.9
+# Actually any class in Ruby is inheritable
+class Person
+ attr_accessor :age, :name
+ def initialize
+ @name
+ @age
+ end
+end
+#-----------------------------
+dude = Person.new
+dude.name = 'Jason'
+dude.age = 23
+printf "%s is age %d.\n", dude.name, dude.age
+#-----------------------------
+# Inheriting from Person
+class Employee < Person
+ attr_accessor :salary
+end
+#-----------------------------
+empl = Employee.new
+empl.name = 'Jason'
+empl.age = 23
+empl.salary = 200
+printf "%s is age %d, the salary is %d.\n", empl.name, empl.age, empl.salary
+#-----------------------------
+# Any built-in class can be inherited the same way
+class WeirdString < String
+ def initialize(obj)
+ super obj
+ end
+ def +(anotherObj) # + method in this class is overridden
+ # to return the sum of string lengths
+ self.length + anotherObj.length # 'self' can be omitted
+ end
+end
+#-----------------------------
+a = WeirdString.new('hello')
+b = WeirdString.new('bye')
+
+puts a + b # the overridden +
+#=> 8
+puts a.length # method from the superclass, String
+#=> 5
+
+
+# @@PLEAC@@_13.11
+# In ruby you can override the method_missing method
+# to have a solution similar to perls AUTOLOAD.
+class Person
+
+ def initialize
+ @ok_fields = %w(name age peers parent)
+ end
+
+ def valid_attribute?(name)
+ @ok_fields.include?(name)
+ end
+
+ def method_missing(namesymbol, *params)
+ name = namesymbol.to_s
+ return if name =~ /^A-Z/
+ if name.to_s[-1] == ('='[0]) # we have a setter
+ isSetter = true
+ name.sub!(/=$/, '')
+ end
+ if valid_attribute?(name)
+ if isSetter
+ instance_variable_set("@#{name}", *params)
+ else
+ instance_variable_get("@#{name}", *params)
+ end
+ else
+ # if no annestor is responsible,
+ # the Object class will throw a NoMethodError exception
+ super(namesymbol, *params)
+ end
+ end
+
+ def new
+ kid = Person.new
+ kid.parent = self
+ kid
+ end
+
+end
+
+dad = Person.new
+dad.name = "Jason"
+dad.age = 23
+kid = dad.new
+kid.name = "Rachel"
+kid.age = 2
+puts "Kid's parent is #{kid.parent.name}"
+puts dad
+puts kid
+
+class Employee < Person
+ def initialize
+ super
+ @ok_fields.push("salary", "boss")
+ end
+ def ok_fields
+ @ok_fields
+ end
+end
+
+
+# @@PLEAC@@_13.13
+# The ruby garbage collector pretends to cope with circular structures.
+# You can test it with this code:
+class RingNode
+ attr_accessor :next
+ attr_accessor :prev
+ attr_reader :name
+
+ def initialize(aName)
+ @name = aName
+ ObjectSpace.define_finalizer(self,
+ self.class.method(:finalize).to_proc)
+ end
+
+ def RingNode.finalize(id)
+ puts "Node #{id} dying"
+ end
+
+ def RingNode.show_all_objects
+ ObjectSpace.each_object {|id|
+ puts id.name if id.class == RingNode
+ }
+ end
+end
+
+def create_test
+ a = RingNode.new("Node A")
+ b = RingNode.new("Node B")
+ c = RingNode.new("Node C")
+ a.next = b
+ b.next = c
+ c.next = a
+ a.prev = c
+ c.prev = b
+ b.prev = a
+
+ a = nil
+ b = nil
+ c = nil
+end
+
+create_test
+RingNode.show_all_objects
+ObjectSpace.garbage_collect
+puts "After garbage collection"
+RingNode.show_all_objects
+
+
+# @@PLEAC@@_13.14
+class String
+ def <=>(other)
+ self.casecmp other
+ end
+end
+
+# There is no way to directly overload the '""' (stringify)
+# operator in Ruby. However, by convention, classes which
+# can reasonably be converted to a String will define a
+# 'to_s' method as in the TimeNumber class defined below.
+# The 'puts' method will automatcally call an object's
+# 'to_s' method as is demonstrated below.
+# Furthermore, if a class defines a to_str method, an object of that
+# class can be used most any place where the interpreter is looking
+# for a String value.
+
+#---------------------------------------
+# NOTE: Ruby has a builtin Time class which would usually be used
+# to manipulate time objects, the following is supplied for
+# educational purposes to demonstrate operator overloading.
+#
+class TimeNumber
+ attr_accessor :hours,:minutes,:seconds
+ def initialize( hours, minutes, seconds)
+ @hours = hours
+ @minutes = minutes
+ @seconds = seconds
+ end
+
+ def to_s
+ return sprintf( "%d:%02d:%02d", @hours, @minutes, @seconds)
+ end
+
+ def to_str
+ to_s
+ end
+
+ def +( other)
+ seconds = @seconds + other.seconds
+ minutes = @minutes + other.minutes
+ hours = @hours + other.hours
+ if seconds >= 60
+ seconds %= 60
+ minutes += 1
+ end
+ if minutes >= 60
+ minutes %= 60
+ hours += 1
+ end
+ return TimeNumber.new(hours, minutes, seconds)
+ end
+
+ def -(other)
+ raise NotImplementedError
+ end
+
+ def *(other)
+ raise NotImplementedError
+ end
+
+ def /( other)
+ raise NotImplementedError
+ end
+end
+
+t1 = TimeNumber.new(0, 58, 59)
+sec = TimeNumber.new(0, 0, 1)
+min = TimeNumber.new(0, 1, 0)
+puts t1 + sec + min + min
+
+#-----------------------------
+# StrNum class example: Ruby's builtin String class already has the
+# capabilities outlined in StrNum Perl example, however the '*' operator
+# on Ruby's String class acts differently: It creates a string which
+# is the original string repeated N times.
+#
+# Using Ruby's String class as is in this example:
+x = "Red"; y = "Black"
+z = x+y
+r = z*3 # r is "RedBlackRedBlackRedBlack"
+puts "values are #{x}, #{y}, #{z}, and #{r}"
+print "#{x} is ", x < y ? "LT" : "GE", " #{y}\n"
+# prints:
+# values are Red, Black, RedBlack, and RedBlackRedBlackRedBlack
+# Red is GE Black
+
+#-----------------------------
+class FixNum
+ REGEX = /(\.\d*)/
+ DEFAULT_PLACES = 0
+ attr_accessor :value, :places
+ def initialize(value, places = nil)
+ @value = value
+ if places
+ @places = places
+ else
+ m = REGEX.match(value.to_s)
+ if m
+ @places = m[0].length - 1
+ else
+ @places = DEFAULT_PLACES
+ end
+ end
+ end
+
+ def +(other)
+ FixNum.new(@value + other.value, max(@places, other.places))
+ end
+
+ def *(other)
+ FixNum.new(@value * other.value, max(@places, other.places))
+ end
+
+ def /(other)
+ puts "Divide: #{@value.to_f/other.value.to_f}"
+ result = FixNum.new(@value.to_f/other.value.to_f)
+ result.places = max(result.places,other.places)
+ result
+ end
+
+ def to_s
+ sprintf("STR%s: %.*f", self.class.to_s , @places, @value) #.
+ end
+
+ def to_str
+ to_s
+ end
+
+ def to_i #convert to int
+ @value.to_i
+ end
+
+ def to_f #convert to float`
+ @value.to_f
+ end
+
+ private
+ def max(a,b)
+ a > b ? a : b
+ end
+end
+
+def demo()
+ x = FixNum.new(40)
+ y = FixNum.new(12, 0)
+
+ puts "sum of #{x} and #{y} is #{x+y}"
+ puts "product of #{x} and #{y} is #{x*y}"
+
+ z = x/y
+ puts "#{z} has #{z.places} places"
+ unless z.places
+ z.places = 2
+ end
+
+ puts "div of #{x} by #{y} is #{z}"
+ puts "square of that is #{z*z}"
+end
+
+if __FILE__ == $0
+ demo()
+end
+
+
+# @@PLEAC@@_14.1
+# There are dbm, sdbm, gdbm modules
+# and the bdb module for accessing the berkeley db
+# sdbm seem to be available on the most systems,
+# so we use it here
+#
+require "sdbm"
+SDBM.open("filename", 0666) { |dbobj|
+ # raises exception if open error
+
+ # the returned sdbm-dbobj has most of the methods of a hash
+ v = dbobj["key"]
+ dbobj["key"] = "newvalue"
+ if dbobj.has_key?("key")
+ # ...
+ end
+ dbobj.delete("key2")
+}
+# database is open only inside the block.
+
+# It is also possible to use a open .. close pair:
+dbobj = SDBM.open("filename", 0666)
+#.. do something with dbobj
+dbobj.close
+
+#!/usr/bin/ruby -w
+# userstats - generate statistics on who is logged in
+# call with usernames as argument to display the totals
+# for the given usernames, call with "ALL" to display all users
+
+require "sdbm"
+filename = '/tmp/userstats.db'
+SDBM.open(filename, 0666) { |dbobj|
+ if ARGV.length > 0
+ if ARGV[0] == "ALL"
+ # ARGV is constant, so we need the variable userlist
+ userlist = dbobj.keys().sort()
+ else
+ userlist = ARGV
+ end
+ userlist.each { |user|
+ print "#{user}\t#{dbobj[user]}\n"
+ }
+ else
+ who = `who`
+ who.split("\n").each { |line|
+ md = /^(\S+)/.match(line)
+ raise "Bad line from who: #{line}" unless md
+ # sdbm stores only strings, so "+=" doesn't work,
+ # we need to convert them expicitly back to integer.
+ if dbobj.has_key?(md[0])
+ dbobj[md[0]] = dbobj[md[0]].to_i + 1
+ else
+ dbobj[md[0]] = "1"
+ end
+ }
+ end
+}
+
+
+# @@PLEAC@@_14.2
+# using open and clear
+dbobj = SDBM.open("filename", 0666)
+dbobj.clear()
+dbobj.close()
+# deleting file and recreating it
+# the filenames depend on the flavor of dbm you use,
+# for example sdbm has two files named filename.pag and filename.dir,
+# so you need to delete both files
+begin
+ File.delete("filename")
+ # raises Exception if not exist
+ dbobj = SDBM.open("filename", 0666)
+rescue
+ # add error handling here
+end
+
+
+# @@PLEAC@@_14.3
+# sdbm2gdbm: converts sdbm database to a gdbm database
+require "sdbm"
+require "gdbm"
+
+unless ARGV.length == 2
+ fail "usage: sdbm2gdbm infile outfile"
+end
+infile = ARGV[0]
+outfile = ARGV[1]
+
+sdb = SDBM.open(infile)
+gdb = GDBM.open(outfile, 0666)
+sdb.each { |key, val|
+ gdb[key] = val
+}
+gdb.close
+sdb.close
+
+
+# @@PLEAC@@_14.4
+#!/usr/bin/ruby -w
+# dbmmerge: merges two dbm databases
+require "sdbm"
+
+unless ARGV.length == 3
+ fail "usage: dbmmerge indb1 indb2 outdb"
+end
+infile1 = ARGV[0]
+infile2 = ARGV[0]
+outfile = ARGV[2]
+
+in1 = SDBM.open(infile1, nil)
+in2 = SDBM.open(infile2, nil)
+outdb = SDBM.open(outfile, 0666)
+
+[in1, in2].each { |indb|
+ indb.each { |key, val|
+ if outdb.has_key?(key)
+ # decide which value to set.
+ # set outdb[key] if necessary
+ else
+ outdb[key] = val
+ end
+ }
+}
+in1.close
+in2.close
+outdb.close
+
+
+# @@PLEAC@@_14.7
+# we write a tie method that extends the Array class.
+# It reads the file into the memory, executes the code block
+# in which you can manipulate the array as needed, and writes
+# the array back to the file after the end of the block execution
+class Array
+ def tie(filename, flags)
+ File.open(filename, flags) { |f|
+ f.each_line { |line|
+ self.push(line.chomp)
+ }
+ yield
+ f.rewind
+ each { |line|
+ if line
+ f.puts(line)
+ else
+ f.puts ""
+ end
+ }
+ }
+ end
+end
+
+array = Array.new
+array.tie("/tmp/textfile.txt", File::RDWR|File::CREAT) {
+ array[4] = "a new line 4"
+}
+
+# The tied array can be manipulated like a normal array,
+# so there is no need for a special API, and the recno_demo program
+# to demonstrate is API is useless
+
+
+# tied array demo: show how to use array with a tied file
+filename = "db_file.txt"
+lines = Array.new
+File.unlink(filename) if File.exists?(filename)
+lines.tie(filename, File::RDWR | File::CREAT) {
+ # first create a textfile to play with
+ lines[0] = "zero"
+ lines[1] = "one"
+ lines[2] = "two"
+ lines[3] = "three"
+ lines[4] = "four"
+
+ # print the records in order.
+ # Opposed to perl, the tied array behaves exactly as a normal array
+ puts "\nOriginal"
+ for i in 0..(lines.length-1)
+ puts "#{i}: #{lines[i]}"
+ end
+
+ #use push and pop
+ a = lines.pop
+ lines.push("last")
+ puts("The last line was [#{a}]")
+
+ #use shift and unshift
+ a = lines.shift
+ lines.unshift("first")
+ puts("The first line was [#{a}]")
+
+ # add record after record 2
+ i = 2
+ lines.insert(i + 1, "Newbie")
+
+ # add record before record one
+ i = 1
+ lines.insert(i, "New One")
+
+ # delete record 3
+ lines.delete_at(3)
+
+ #now print the records in reverse order
+ puts "\nReverse"
+ (lines.length - 1).downto(0){ |i|
+ puts "#{i}: #{lines[i]}"
+ }
+
+}
+
+
+# @@PLEAC@@_14.8
+# example to store complex data in a database
+# uses marshall from the standard library
+require "sdbm"
+db = SDBM.open("pleac14-8-database", 0666)
+
+# convert the Objects into strings and back by using the Marshal module.
+# Most normal objects can be converted out of the box,
+# but not special things like procedure objects,
+# IO instance variables, singleton objects
+
+db["Tom Christiansen"] = Marshal.dump(["book author", "tchrist@perl.com"])
+db["Tom Boutell"] = Marshal.dump(["shareware author",
+"boutell@boutell.com"])
+
+name1 = "Tom Christiansen"
+name2 = "Tom Boutell"
+
+tom1 = Marshal.load(db[name1])
+tom2 = Marshal.load(db[name2])
+
+puts "Two Toming: #{tom1} #{tom2}"
+
+if tom1[0] == tom2[0] && tom1[1] == tom2[1]
+ puts "You're having runtime fun with one Tom made two."
+else
+ puts "No two Toms are ever alike"
+end
+
+# To change parts of an entry, get the whole entry, change the parts,
+# and save the whole entry back
+entry = Marshal.load(db["Tom Boutell"])
+entry[0] = "Poet Programmer"
+db["Tom Boutell"] = Marshal.dump(entry)
+db.close
+
+
+# @@PLEAC@@_14.9
+# example to make data persistent
+# uses Marshal from the standard lib
+# Stores the data in a simple file,
+# see 14.8 on how to store it in a dbm file
+
+# The BEGIN block is executed before the rest of the script
+# we use global variables here because local variables
+# will go out of scope and are not accessible from the main script
+
+BEGIN {
+ $persistent_store = "persitence.dat"
+ begin
+ File.open($persistent_store) do |f|
+ $stringvariable1 = Marshal.load(f)
+ $arrayvariable2 = Marshal.load(f)
+ end
+ rescue
+ puts "Can not open #{$persistent_store}"
+ # Initialisation if this script runs the first time
+ $stringvariable1 = ""
+ $arrayvariable2 = []
+ end
+}
+
+END {
+ File.open($persistent_store, "w+") do |f|
+ Marshal.dump($stringvariable1, f)
+ Marshal.dump($arrayvariable2, f)
+ end
+}
+
+# simple test program
+puts $stringvariable1
+puts $arrayvariable2
+$stringvariable1 = "Hello World"
+$arrayvariable2.push(5)
+puts $stringvariable1
+puts $arrayvariable2
+
+
+# @@PLEAC@@_14.10
+#!/usr/bin/ruby -w
+# Ruby has a dbi module with an architecture similar
+# to the Perl dbi module: the dbi module provides an unified
+# interface and uses specialized drivers for each dbms vendor
+#
+begin
+ DBI.connect("DBI:driver:driverspecific", "username", "auth") {
+ |dbh|
+
+ dbh.do(SQL1)
+
+ dbh.prepare(SQL2){ |sth|
+ sth.execute
+ sth.fetch {|row|
+ # ...
+ }
+ } # end of block finishes the statement handle
+ } # end of block closes the database connection
+rescue DBI::DatabaseError => e
+ puts "dbi error occurred"
+ puts "Error code: #{e.err}"
+ puts "Error message: #{e.errstr}"
+end
+
+#!/usr/bin/ruby -w
+# dbusers - example for mysql which creates a table,
+# fills it with values, retrieves the values back,
+# and finally destroys the table.
+
+require "dbi"
+
+# replacement for the User::pwnt module
+def getpwent
+ result = []
+ File.open("/etc/passwd") {|file|
+ file.each_line {|line|
+ next if line.match(/^#/)
+ cols = line.split(":")
+ result.push([cols[2], cols[0]])
+ }
+ }
+ result
+end
+
+begin
+ DBI.connect("DBI:Mysql:pleacdatabase", "pleac", "pleacpassword") {
+ |conn|
+
+ conn.do("CREATE TABLE users (uid INT, login CHAR(8))")
+
+ users = getpwent
+
+ conn.prepare("INSERT INTO users VALUES (?,?)") {|sth|
+ users.each {|entry|
+ sth.execute(entry[0], entry[1])
+ }
+ }
+
+ conn.execute("SELECT uid, login FROM users WHERE uid < 50") {|sth|
+ sth.fetch {|row|
+ puts row.collect {|col|
+ if col.nil?
+ "(null)"
+ else
+ col
+ end
+ }.join(", ")
+ }
+ }
+
+ conn.do("DROP TABLE users")
+ }
+rescue DBI::DatabaseError => e
+ puts "dbi error occurred"
+ puts "Error code: #{e.err}"
+ puts "Error message: #{e.errstr}"
+end
+
+
+# @@PLEAC@@_15.1
+# This test program demonstrates parsing program arguments.
+# It uses the optparse library, which is included with ruby 1.8
+# It handles classic unix style and gnu style options
+require 'optparse'
+
+@debugmode = false
+@verbose = false
+
+ARGV.options do |opts|
+ opts.banner = "Usage: ruby #{$0} [OPTIONS] INPUTFILES"
+
+ opts.on("-h", "--help", "show this message") {
+ puts opts
+ exit
+ }
+ # The OptionParser#on method is called with a specification of short
+ # options, of long options, a data type spezification and user help
+ # messages for this option.
+ # The method analyses the given parameter and decides what it is,
+ # so you can leave out the long option if you don't need it
+ opts.on("-v", "--[no-]verbose=[FLAG]", TrueClass, "run verbosly") {
+ |@verbose| # sets @verbose to true or false
+ }
+ opts.on("-D", "--DEBUG", TrueClass, "turns on debug mode" ){
+ |@debugmode| # sets @debugmode to true
+ }
+ opts.on("-c", "--count=NUMBER", Integer, "how many times we do it" ){
+ |@count| # sets @count to given integer
+ }
+ opts.on("-o", "--output=FILE", String, "file to write output to"){
+ |@outputfile| # sets @outputfile to given string
+ }
+ opts.parse!
+end
+
+# example to use the options in the main program
+puts "Verbose is on" if @verbose
+puts "Debugmode is on" if @debugmode
+puts "Outfile is #{@outputfile}" if defined? @outputfile
+puts "Count is #{@count}" if defined? @count
+ARGV.each { |param|
+ puts "Got parameter #{param}"
+}
+
+
+# @@PLEAC@@_15.4
+buf = "\0" * 8
+$stdout.ioctl(0x5413, buf)
+ws_row, ws_col, ws_xpixel, ws_ypixel = buf.unpack("S4")
+
+raise "You must have at least 20 characters" unless ws_col >= 20
+max = 0
+values = (1..5).collect { rand(20) } # generate an array[5] of rand values
+for i in values
+ max = i if max < i
+end
+ratio = Float(ws_col-12)/max # chars per unit
+for i in values
+ printf "%8.1f %s\n", i, "*" * (ratio*i)
+end
+
+# gives, for example:
+# 15.0 *******************************
+# 10.0 *********************
+# 5.0 **********
+# 14.0 *****************************
+# 18.0 **************************************
+
+
+# @@PLEAC@@_16.1
+output = `program args` # collect output into one multiline string
+output = `program args`.split # collect output into array, one line per
+element
+
+readme = IO.popen("ls")
+output = ""
+while readme.gets do
+ output += $_
+end
+readme.close
+
+`fsck -y /dev/rsd1a` # BAD AND SCARY in Perl because it's managed by the shell
+ # I donna in Ruby ...
+
+# so the "clean and secure" version
+readme, writeme = IO.pipe
+pid = fork {
+ # child
+ $stdout = writeme
+ readme.close
+ exec('find', '..')
+}
+# parent
+Process.waitpid(pid, 0)
+writeme.close
+while readme.gets do
+ # do something with $_
+end
+
+
+# @@PLEAC@@_16.2
+status = system("xemacs #{myfile}")
+
+status = system("xemacs", myfile)
+
+system("cmd1 args | cmd2 | cmd3 >outfile")
+system("cmd args <infile >outfile 2>errfile")
+
+# stop if the command fails
+raise "$program exited funny: #{$?}" unless system("cmd", "args1", "args2")
+
+# get the value of the signal sent to the child
+# even if it is a SIGINT or SIGQUIT
+system(arglist)
+raise "program killed by signal #{$?}" if ($? & 127) != 0
+
+pid = fork {
+ trap("SIGINT", "IGNORE")
+ exec("sleep", "10")
+}
+trap ("SIGINT") {
+ puts "Tsk tsk, no process interruptus"
+}
+Process.waitpid(pid, 0)
+
+# Ruby doesn't permit to lie to the program called by a 'system'.
+# (ie specify what return argv[0] in C, $0 in Perl/Ruby ...)
+# A (dirty) way is to create a link (under Unix), run this link and
+# erase it. Somebody has a best idea ?
+
+
+# @@PLEAC@@_16.3
+exec("archive *.data")
+
+exec("archive", "accounting.data")
+
+exec("archive accounting.data")
+
+
+# @@PLEAC@@_16.4
+# read the output of a program
+IO.popen("ls") {|readme|
+ while readme.gets do
+ # ...
+ end
+}
+# or
+readme = IO.popen("ls")
+while readme.gets do
+ # ...
+end
+readme.close
+
+# "write" in a program
+IO.popen("cmd args","w") {|pipe|
+ pipe.puts("data")
+ pipe.puts("foo")
+}
+
+# close wait for the end of the process
+read = IO.popen("sleep 10000") # child goes to sleep
+read.close # and the parent goes to lala land
+
+writeme = IO.popen("cmd args", "w")
+writeme.puts "hello" # program will get hello\n on STDIN
+writeme.close # program will get EOF on STDIN
+
+# send in a pager (eg less) all output
+$stdout = IO.popen("/usr/bin/less","w")
+print "huge string\n" * 10000
+
+
+# @@PLEAC@@_16.5
+#-----------------------------
+def head(lines = 20)
+ pid = open("|-","w")
+ if pid == nil
+ return
+ else
+ while gets() do
+ pid.print
+ lines -= 1
+ break if lines == 0
+ end
+ end
+ exit
+end
+
+head(100)
+while gets() do
+ print
+end
+#-----------------------------
+1: > Welcome to Linux, version 2.0.33 on a i686
+
+2: >
+
+3: > "The software required `Windows 95 or better',
+
+4: > so I installed Linux."
+#-----------------------------
+> 1: Welcome to Linux, Kernel version 2.0.33 on a i686
+
+> 2:
+
+> 3: "The software required `Windows 95 or better',
+
+> 4: so I installed Linux."
+#-----------------------------
+#!/usr/bin/ruby
+# qnumcat - demo additive output filters
+
+def number()
+ pid = open("|-","w")
+ if pid == nil
+ return
+ else
+ while gets() do pid.printf("%d: %s", $., $_); end
+ end
+ exit
+end
+
+def quote()
+ pid = open("|-","w")
+ if pid == nil
+ return
+ else
+ while gets() do pid.print "> #{$_}" end
+ end
+ exit
+end
+
+number()
+quote()
+
+while gets() do
+ print
+end
+$stdout.close
+exit
+
+
+# @@PLEAC@@_16.6
+ARGV.map! { |arg|
+ arg =~ /\.(gz|Z)$/ ? "|gzip -dc #{arg}" : arg
+}
+for file in ARGV
+ fh = open(file)
+ while fh.gets() do
+ # .......
+ end
+end
+#-----------------------------
+ARGV.map! { |arg|
+ arg =~ %r#^\w+://# ? "|GET #{arg}" : arg #
+}
+for file in ARGV
+ fh = open(file)
+ while fh.gets() do
+ # .......
+ end
+end
+#-----------------------------
+pwdinfo = (`domainname` =~ /^(\(none\))?$/) ? '/etc/passwd' : '|ypcat passwd';
+pwd = open(pwdinfo);
+#-----------------------------
+puts "File, please? ";
+file = gets().chomp();
+fh = open(file);
+
+
+# @@PLEAC@@_16.7
+output = `cmd 2>&1` # with backticks
+# or
+ph = open("|cmd 2>&1") # with an open pipe
+while ph.gets() { } # plus a read
+#-----------------------------
+output = `cmd 2>/dev/null` # with backticks
+# or
+ph = open("|cmd 2>/dev/null") # with an open pipe
+while ph.gets() { } # plus a read
+#-----------------------------
+output = `cmd 2>&1 1>/dev/null` # with backticks
+# or
+ph = open("|cmd 2>&1 1>/dev/null") # with an open pipe
+while ph.gets() { } # plus a read
+#-----------------------------
+output = `cmd 3>&1 1>&2 2>&3 3>&-` # with backticks
+# or
+ph = open("|cmd 3>&1 1>&2 2>&3 3>&-") # with an open pipe
+while ph.gets() { } # plus a read
+#-----------------------------
+system("program args 1>/tmp/program.stdout 2>/tmp/program.stderr")
+#-----------------------------
+output = `cmd 3>&1 1>&2 2>&3 3>&-`
+#-----------------------------
+fd3 = fd1
+fd1 = fd2
+fd2 = fd3
+fd3 = undef
+#-----------------------------
+system("prog args 1>tmpfile 2>&1")
+system("prog args 2>&1 1>tmpfile")
+#-----------------------------
+# system ("prog args 1>tmpfile 2>&1")
+fd1 = "tmpfile" # change stdout destination first
+fd2 = fd1 # now point stderr there, too
+#-----------------------------
+# system("prog args 2>&1 1>tmpfile")
+fd2 = fd1 # stderr same destination as stdout
+fd1 = "tmpfile" # but change stdout destination
+#-----------------------------
+# It is often better not to rely on the shell,
+# because of portability, possible security problems
+# and bigger resource usage. So, it is often better to use the open3 library.
+# See below for an example.
+# opening stdin, stdout, stderr
+require "open3"
+stdin, stdout, stderr = Open3.popen('cmd')
+
+
+# @@PLEAC@@_16.8
+#-----------------------------
+# Contrary to perl, we don't need to use a module in Ruby
+fh = Kernel.open("|" + program, "w+")
+fh.puts "here's your input\n"
+output = fh.gets()
+fh.close()
+#-----------------------------
+Kernel.open("|program"),"w+") # RIGHT !
+#-----------------------------
+# Ruby has already object methods for I/O handles
+#-----------------------------
+begin
+ fh = Kernel.open("|" + program_and_options, "w+")
+rescue
+ if ($@ ~= /^open/)
+ $stderr.puts "open failed : #{$!} \n #{$@} \n"
+ break
+ end
+ raise # reraise unforseen exception
+end
+
+
+# @@PLEAC@@_16.13
+#% kill -l
+#HUP INT QUIT ILL TRAP ABRT BUS FPE KILL USR1 SEGV USR2 PIPE
+#ALRM TERM CHLD CONT STOP TSTP TTIN TTOU URG XCPU XFSZ VTALRM
+#PROF WINCH POLL PWR
+#-----------------------------
+#% ruby -e 'puts Signal.list.keys.join(" ")'
+#PWR USR1 BUS USR2 TERM SEGV KILL POLL STOP SYS TRAP IOT HUP INT #
+#WINCH XCPU TTIN CLD TSTP FPE IO TTOU PROF CHLD CONT PIPE ABRT
+#VTALRM QUIT ILL XFSZ URG ALRM
+#-----------------------------
+# After that, the perl script create an hash equivalent to Signal.list,
+# and an array. The array can be obtained by :
+signame = []
+Signal.list.each { |name, i| signame[i] = name }
+
+
+# @@PLEAC@@_16.14
+Process.kill(9, pid) # send $pid a signal 9
+Process.kill(-1, Process.getpgrp()) # send whole job a signal 1
+Process.kill("USR1", $$) # send myself a SIGUSR1
+Process.kill("HUP", pid1, pid2, pid3) # send a SIGHUP to processes in @pids
+#-----------------------------
+begin
+ Process.kill(0, minion)
+ puts "#{minion} is alive!"
+rescue Errno::EPERM # changed uid
+ puts "#{minion} has escaped my control!";
+rescue Errno::ESRCH
+ puts "#{minion} is deceased."; # or zombied
+rescue
+ puts "Odd; I couldn't check the status of #{minion} : #{$!}"
+end
+
+
+# @@PLEAC@@_16.15
+Kernel.trap("QUIT", got_sig_quit) # got_sig_quit = Proc.new { puts "Quit\n" }
+trap("PIPE", "got_sig_quit") # def got_sig_pipe ...
+trap("INT") { ouch++ } # increment ouch for every SIGINT
+#-----------------------------
+trap("INT", "IGNORE") # ignore the signal INT
+#-----------------------------
+trap("STOP", "DEFAULT") # restore default STOP signal handling
+
+
+# @@PLEAC@@_16.16
+# the signal handler
+def ding
+ trap("INT", "ding")
+ puts "\aEnter your name!"
+end
+
+# prompt for name, overriding SIGINT
+def get_name
+ save = trap("INT", "ding")
+
+ puts "Kindly Stranger, please enter your name: "
+ name = gets().chomp()
+ trap("INT", save)
+ name
+end
+
+
+# @@PLEAC@@_16.21
+# implemented thanks to http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/1760
+require 'timeout'
+
+# we'll do something vastly more useful than cookbook to demonstrate timeouts
+begin
+ timeout(5) {
+ waitsec = rand(10)
+ puts "Let's see if a sleep of #{waitsec} seconds is longer than 5 seconds..."
+ system("sleep #{waitsec}")
+ }
+ puts "Timeout didn't occur"
+rescue Timeout::Error
+ puts "Timed out!"
+end
+
+
+# @@PLEAC@@_17.1
+# A basic TCP client connection
+require 'socket'
+begin
+ t = TCPSocket.new('www.ruby-lang.org', 'www')
+rescue
+ puts "error: #{$!}"
+else
+ # ... do something with the socket
+ t.print "GET / HTTP/1.0\n\n"
+ answer = t.gets(nil)
+ # and terminate the connection when we're done
+ t.close
+end
+
+# Using the evil low level socket API
+require 'socket'
+# create a socket
+s = Socket.new(Socket::AF_INET, Socket::SOCK_STREAM, 0)
+# build the address of the remote machine
+sockaddr_server = [Socket::AF_INET, 80,
+ Socket.gethostbyname('www.ruby-lang.org')[3],
+ 0, 0].pack("snA4NN")
+# connect
+begin
+ s.connect(sockaddr_server)
+rescue
+ puts "error: #{$!}"
+else
+ # ... do something with the socket
+ s.print "GET / HTTP/1.0\n\n"
+ # and terminate the connection when we're done
+ s.close
+end
+
+# TCP connection with management of error (DNS)
+require 'socket'
+begin
+ client = TCPSocket.new('does not exists', 'www')
+rescue
+ puts "error: #{$!}"
+end
+
+# TCP connection with a time out
+require 'socket'
+require 'timeout'
+begin
+ timeout(1) do #the server has one second to answer
+ client = TCPSocket.new('www.host.com', 'www')
+ end
+rescue
+ puts "error: #{$!}"
+end
+
+
+# @@PLEAC@@_17.12
+require 'socket'
+
+class Preforker
+ attr_reader (:child_count)
+
+ def initialize(prefork, max_clients_per_child, port, client_handler)
+ @prefork = prefork
+ @max_clients_per_child = max_clients_per_child
+ @port = port
+ @child_count = 0
+
+ @reaper = proc {
+ trap('CHLD', @reaper)
+ pid = Process.wait
+ @child_count -= 1
+ }
+
+ @huntsman = proc {
+ trap('CHLD', 'IGNORE')
+ trap('INT', 'IGNORE')
+ Process.kill('INT', 0)
+ exit
+ }
+
+ @client_handler=client_handler
+ end
+
+ def child_handler
+ trap('INT', 'EXIT')
+ @client_handler.setUp
+ # wish: sigprocmask UNblock SIGINT
+ @max_clients_per_child.times {
+ client = @server.accept or break
+ @client_handler.handle_request(client)
+ client.close
+ }
+ @client_handler.tearDown
+ end
+
+ def make_new_child
+ # wish: sigprocmask block SIGINT
+ @child_count += 1
+ pid = fork do
+ child_handler
+ end
+ # wish: sigprocmask UNblock SIGINT
+ end
+
+ def run
+ @server = TCPserver.open(@port)
+ trap('CHLD', @reaper)
+ trap('INT', @huntsman)
+ loop {
+ (@prefork - @child_count).times { |i|
+ make_new_child
+ }
+ sleep .1
+ }
+ end
+end
+
+#-----------------------------
+#!/usr/bin/ruby
+
+require 'Preforker'
+
+class ClientHandler
+ def setUp
+ end
+
+ def tearDown
+ end
+
+ def handle_request(client)
+ # do stuff
+ end
+end
+
+server = Preforker.new(1, 100, 3102, ClientHandler.new)
+server.run
+
+
+# @@PLEAC@@_18.2
+require 'net/ftp'
+
+begin
+ ftp = Net::FTP::new("ftp.host.com")
+ ftp.login(username,password)
+ ftp.chdir(directory)
+ ftp.get(filename)
+ ftp.put(filename)
+rescue Net::FTPError
+ $stderr.print "FTP failed: " + $!
+ensure
+ ftp.close() if ftp
+end
+
+# A better solution for a local use could be :
+Net::FTP::new("ftp.host.com") do |ftp|
+ ftp.login(username,password)
+ ftp.chdir(directory)
+ ftp.get(filename)
+ ftp.put(filename)
+end
+
+# If you have only one file to get, there is a simple solution :
+require 'open-uri'
+open("ftp://www.ruby-lang.org/path/filename") do |fh|
+ # read from filehandle fh
+end
+#--------------------------------------------
+# to wait a defined time for the connection,
+# use the timeout module
+require 'timeout'
+begin
+ timeout(30){
+ ftp = Net::FTP::new("ftp.host.com")
+ ftp.debug_mode = true
+ }
+rescue Net::FTPError
+ $stderr.puts "Couldn't connect."
+rescue Timeout::Error
+ $stderr.puts "Timeout while connecting to server."
+end
+
+begin
+ ftp.login()
+rescue Net::FTPError
+ $stderr.print "Couldn't authentificate.\n"
+end
+
+begin
+ ftp.login(username)
+rescue Net::FTPError
+ $stderr.print "Still couldn't authenticate.\n"
+end
+
+begin
+ ftp.login(username, password)
+rescue Net::FTPError
+ $stderr.print "Couldn't authenticate, even with explicit
+ username and password.\n"
+end
+
+begin
+ ftp.login(username, password, account)
+rescue Net::FTPError
+ $stderr.print "No dice. It hates me.\n"
+end
+#-----------------------------
+ftp.put(localfile, remotefile)
+#-----------------------------
+# Sending data from STDIN is not directly supported
+# by the ftp library module. A possible way to do it is to use the
+# storlines method directly to send raw commands to the ftp server.
+#-----------------------------
+ftp.get(remotefile, localfile)
+#-----------------------------
+ftp.get(remotefile) { |data| puts data }
+#-----------------------------
+ftp.chdir("/pub/ruby")
+print "I'm in the directory ", ftp.pwd(), "\n"
+#-----------------------------
+ftp.mkdir("/pub/ruby/new_dir")
+#-----------------------------
+lines = ftp.ls("/pub/ruby/")
+# => ["drwxr-xr-x 2 matz users 4096 July 17 1998 1.0", ... ]
+
+latest = ftp.dir("/pub/ruby/*.tgz").sort.last
+
+ftp.nlst("/pub/ruby")
+# => ["/pub/ruby/1.0", ... ]
+#-----------------------------
+ftp.quit()
+
+
+# @@PLEAC@@_18.6
+require 'net/telnet'
+t = Net::Telnet::new( "Timeout" => 10,
+ "Prompt" => /%/,
+ "Host" => host )
+t.login(username, password)
+files = t.cmd("ls")
+t.print("top")
+process_string = t.waitfor(/\d+ processes/)
+t.close
+#-----------------------------
+/[$%#>] \z/n
+#-----------------------------
+# In case of an error, the telnet module throws an exception.
+# For control of the behavior in case of an error,
+# you just need to catch the exceptions and do your custom
+# error handling.
+#-----------------------------
+begin
+ telnet.login(username, password)
+rescue TimeoutError
+ fail "Login failed !\n"
+end
+#-----------------------------
+telnet.waitfor('/--more--/')
+#-----------------------------
+telnet.waitfor(String => 'greasy smoke', Timeout => 30)
+
+
+# @@PLEAC@@_18.7
+require 'ping'
+
+puts "#{host} is alive.\n" if Ping.pingecho(host);
+#-----------------------------
+# the ping module only use TCP ping, not ICMP even if we are root
+if Ping.pingecho("kingkong.com")
+ puts "The giant ape lives!\n";
+else
+ puts "All hail mighty Gamera, friend of children!\n";
+end
+
+
+# @@PLEAC@@_19.1
+#!/usr/local/bin/ruby -w
+# hiweb - load CGI class to decode information given by web server
+
+require 'cgi'
+
+cgi = CGI.new('html3')
+
+# get a parameter from a form
+value = cgi.params['PARAM_NAME'][0]
+
+# output a document
+cgi.out {
+ cgi.html {
+ cgi.head { cgi.title { "Howdy there!" } } +
+ cgi.body { cgi.p { "You typed: " + cgi.tt {
+ CGI.escapeHTML(value) } } }
+ }
+}
+
+require 'cgi'
+cgi = CGI.new
+who = cgi.param["Name"][0] # first param in list
+phone = cgi.param["Number"][0]
+picks = cgi.param["Choices"] # complete list
+
+print cgi.header( 'type' => 'text/plain',
+ 'expires' => Time.now + (3 * 24 * 60 * 60) )
+
+
+# @@PLEAC@@_19.3
+#!/usr/local/bin/ruby -w
+# webwhoami - show web user's id
+require 'etc'
+print "Content-Type: text/plain\n\n"
+print "Running as " + Etc.getpwuid.name + "\n"
+
+# % ruby -wc cgi-script # just check syntax
+
+# % ruby -w cgi-script # params from stdin
+# (offline mode: enter name=value pairs on standard input)
+# name=joe
+# number=10
+# ^D
+
+# % ruby -w cgi-script name=joe number=10 # run with mock form input
+# % ruby -d cgi-script name=joe number=10 # ditto, under the debugger
+
+# POST method script in csh
+# % (setenv HTTP_METHOD POST; ruby -w cgi-script name=joe number=10)
+# POST method script in sh
+# % HTTP_METHOD=POST perl -w cgi-script name=joe number=10
+
+
+# @@PLEAC@@_19.4
+# ruby has several security levels, the level "1" is similar to perls taint mode.
+# It can be switched on by providing the -T command line parameter
+# or by setting $SAFE to 1. Setting $SAFE to 2,3 or 4 restricts possible
+# harmful operations further.
+
+#!/usr/bin/ruby -T
+$SAFE = 1
+File.open(ARGV[0], "w")
+# ruby warns with:
+# taint1.rb:2:in `initialize': Insecure operation - initialize (SecurityError)
+
+$SAFE = 1
+file = ARGV[0]
+unless /^([\w.-]+)$/.match(file)
+ raise "filename #{file} has invalid characters"
+end
+file = $1
+# In ruby, even the back reference from a regular expression stays tainted.
+# you need to explicitly untaint the variable:
+file.untaint
+File.open(file, "w")
+
+# Race condition exists like in perl:
+unless File.exists(filename) # Wrong because of race condition
+ File.open(filename, "w")
+end
+
+
+
+# @@PLEAC@@_19.10
+preference_value = cgi.cookies["preference name"][0]
+
+packed_cookie = CGI::Cookie.new("name" => "preference name",
+ "value" => "whatever you'd like",
+ "expires" => Time.local(Time.now.year + 2,
+ Time.now.mon, Time.now.day, Time.now.hour, Time.now.min, Time.now.sec) )
+
+cgi.header("cookie" => [packed_cookie])
+
+#!/usr/local/bin/ruby -w
+# ic_cookies - sample CGI script that uses a cookie
+require 'cgi'
+
+cgi = CGI.new('html3')
+
+cookname = "favorite ice cream"
+favorite = cgi.params["flavor"][0]
+tasty = cgi.cookies[cookname][0] || 'mint'
+
+unless favorite
+ cgi.out {
+ cgi.html {
+ cgi.head { cgi.title { "Ice Cookies" } } +
+ cgi.body {
+ cgi.h1 { "Hello Ice Cream" } +
+ cgi.hr +
+ cgi.form {
+ cgi.p { "Please select a flavor: " +
+ cgi.text_field("flavor", tasty ) }
+ } +
+ cgi.hr
+ }
+ }
+ }
+else
+ cookie = CGI::Cookie.new( "name" => cookname,
+ "value" => favorite,
+ "expires" => Time.local(Time.now.year + 2,
+Time.now.mon, Time.now.day, Time.now.hour, Time.now.min, Time.now.sec) )
+ cgi.out("cookie" => [cookie]) {
+ cgi.html {
+ cgi.head { cgi.title { "Ice Cookies" } } +
+ cgi.body {
+ cgi.h1 { "Hello Ice Cream" } +
+ cgi.p { "You chose as your favorite flavor `#{favorite}'." }
+ }
+ }
+ }
+end
+
+
+# @@PLEAC@@_20.9
+def templatefile(filename, fillings)
+ aFile = File.new(filename, "r")
+ text = aFile.read()
+ aFile.close()
+ pattern = Regexp.new('%%(.*?)%%')
+ text.gsub!(pattern) {
+ fillings[$1] || ""
+ }
+ text
+end
+
+fields = {
+ 'username' => whats_his_name,
+ 'count' => login_count,
+ 'total' => minutes_used
+}
+puts templatefile('simple.template', fields)
+
+# @@INCOMPLETE@@
+# An example using databases is missing
+
diff --git a/bench/example.ruby b/bench/example.ruby new file mode 100644 index 0000000..c89d3ab --- /dev/null +++ b/bench/example.ruby @@ -0,0 +1,10070 @@ +module CodeRay
+ module Scanners
+
+class Ruby < Scanner
+
+ RESERVED_WORDS = [
+ 'and', 'def', 'end', 'in', 'or', 'unless', 'begin',
+ 'defined?', 'ensure', 'module', 'redo', 'super', 'until',
+ 'BEGIN', 'break', 'do', 'next', 'rescue', 'then',
+ 'when', 'END', 'case', 'else', 'for', 'retry',
+ 'while', 'alias', 'class', 'elsif', 'if', 'not', 'return',
+ 'undef', 'yield',
+ ]
+
+ DEF_KEYWORDS = ['def']
+ MODULE_KEYWORDS = ['class', 'module']
+ DEF_NEW_STATE = WordList.new(:initial).
+ add(DEF_KEYWORDS, :def_expected).
+ add(MODULE_KEYWORDS, :module_expected)
+
+ WORDS_ALLOWING_REGEXP = [
+ 'and', 'or', 'not', 'while', 'until', 'unless', 'if', 'elsif', 'when'
+ ]
+ REGEXP_ALLOWED = WordList.new(false).
+ add(WORDS_ALLOWING_REGEXP, :set)
+
+ PREDEFINED_CONSTANTS = [
+ 'nil', 'true', 'false', 'self',
+ 'DATA', 'ARGV', 'ARGF', '__FILE__', '__LINE__',
+ ]
+
+ IDENT_KIND = WordList.new(:ident).
+ add(RESERVED_WORDS, :reserved).
+ add(PREDEFINED_CONSTANTS, :pre_constant)
+
+ METHOD_NAME = / #{IDENT} [?!]? /xo
+ METHOD_NAME_EX = /
+ #{METHOD_NAME} # common methods: split, foo=, empty?, gsub!
+ | \*\*? # multiplication and power
+ | [-+~]@? # plus, minus
+ | [\/%&|^`] # division, modulo or format strings, &and, |or, ^xor, `system`
+ | \[\]=? # array getter and setter
+ | <=?>? | >=? # comparison, rocket operator
+ | << | >> # append or shift left, shift right
+ | ===? # simple equality and case equality
+ /ox
+ GLOBAL_VARIABLE = / \$ (?: #{IDENT} | \d+ | [~&+`'=\/,;_.<>!@0$?*":F\\] | -[a-zA-Z_0-9] ) /ox
+
+ DOUBLEQ = / " [^"\#\\]* (?: (?: \#\{.*?\} | \#(?:$")? | \\. ) [^"\#\\]* )* "? /ox
+ SINGLEQ = / ' [^'\\]* (?: \\. [^'\\]* )* '? /ox
+ STRING = / #{SINGLEQ} | #{DOUBLEQ} /ox
+ SHELL = / ` [^`\#\\]* (?: (?: \#\{.*?\} | \#(?:$`)? | \\. ) [^`\#\\]* )* `? /ox
+ REGEXP = / \/ [^\/\#\\]* (?: (?: \#\{.*?\} | \#(?:$\/)? | \\. ) [^\/\#\\]* )* \/? /ox
+
+ DECIMAL = /\d+(?:_\d+)*/ # doesn't recognize 09 as octal error
+ OCTAL = /0_?[0-7]+(?:_[0-7]+)*/
+ HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/
+ BINARY = /0b[01]+(?:_[01]+)*/
+
+ EXPONENT = / [eE] [+-]? #{DECIMAL} /ox
+ FLOAT = / #{DECIMAL} (?: #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? ) /
+ INTEGER = /#{OCTAL}|#{HEXADECIMAL}|#{BINARY}|#{DECIMAL}/
+
+ def reset
+ super
+ @regexp_allowed = false
+ end
+
+ def next_token
+ return if @scanner.eos?
+
+ kind = :error
+ if @scanner.scan(/\s+/) # in every state
+ kind = :space
+ @regexp_allowed = :set if @regexp_allowed or @scanner.matched.index(?\n) # delayed flag setting
+
+ elsif @state == :def_expected
+ if @scanner.scan(/ (?: (?:#{IDENT}(?:\.|::))* | (?:@@?|$)? #{IDENT}(?:\.|::) ) #{METHOD_NAME_EX} /ox)
+ kind = :method
+ @state = :initial
+ else
+ @scanner.getch
+ end
+ @state = :initial
+
+ elsif @state == :module_expected
+ if @scanner.scan(/<</)
+ kind = :operator
+ else
+ if @scanner.scan(/ (?: #{IDENT} (?:\.|::))* #{IDENT} /ox)
+ kind = :method
+ else
+ @scanner.getch
+ end
+ @state = :initial
+ end
+
+ elsif # state == :initial
+ # IDENTIFIERS, KEYWORDS
+ if @scanner.scan(GLOBAL_VARIABLE)
+ kind = :global_variable
+ elsif @scanner.scan(/ @@ #{IDENT} /ox)
+ kind = :class_variable
+ elsif @scanner.scan(/ @ #{IDENT} /ox)
+ kind = :instance_variable
+ elsif @scanner.scan(/ __END__\n ( (?!\#CODE\#) .* )? | \#[^\n]* | =begin(?=\s).*? \n=end(?=\s|\z)(?:[^\n]*)? /mx)
+ kind = :comment
+ elsif @scanner.scan(METHOD_NAME)
+ if @last_token_dot
+ kind = :ident
+ else
+ matched = @scanner.matched
+ kind = IDENT_KIND[matched]
+ if kind == :ident and matched =~ /^[A-Z]/
+ kind = :constant
+ elsif kind == :reserved
+ @state = DEF_NEW_STATE[matched]
+ @regexp_allowed = REGEXP_ALLOWED[matched]
+ end
+ end
+
+ elsif @scanner.scan(STRING)
+ kind = :string
+ elsif @scanner.scan(SHELL)
+ kind = :shell
+ elsif @scanner.scan(/<<
+ (?:
+ ([a-zA-Z_0-9]+)
+ (?: .*? ^\1$ | .* )
+ |
+ -([a-zA-Z_0-9]+)
+ (?: .*? ^\s*\2$ | .* )
+ |
+ (["\'`]) (.+?) \3
+ (?: .*? ^\4$ | .* )
+ |
+ - (["\'`]) (.+?) \5
+ (?: .*? ^\s*\6$ | .* )
+ )
+ /mxo)
+ kind = :string
+ elsif @scanner.scan(/\//) and @regexp_allowed
+ @scanner.unscan
+ @scanner.scan(REGEXP)
+ kind = :regexp
+/%(?:[Qqxrw](?:\([^)#\\\\]*(?:(?:#\{.*?\}|#|\\\\.)[^)#\\\\]*)*\)?|\[[^\]#\\\\]*(?:(?:#\{.*?\}|#|\\\\.)[^\]#\\\\]*)*\]?|\{[^}#\\\\]*(?:(?:#\{.*?\}|#|\\\\.)[^}#\\\\]*)*\}?|<[^>#\\\\]*(?:(?:#\{.*?\}|#|\\\\.)[^>#\\\\]*)*>?|([^a-zA-Z\\\\])(?:(?!\1)[^#\\\\])*(?:(?:#\{.*?\}|#|\\\\.)(?:(?!\1)[^#\\\\])*)*\1?)|\([^)#\\\\]*(?:(?:#\{.*?\}|#|\\\\.)[^)#\\\\]*)*\)?|\[[^\]#\\\\]*(?:(?:#\{.*?\}|#|\\\\.)[^\]#\\\\]*)*\]?|\{[^}#\\\\]*(?:(?:#\{.*?\}|#|\\\\.)[^}#\\\\]*)*\}?|<[^>#\\\\]*(?:(?:#\{.*?\}|#|\\\\.)[^>#\\\\]*)*>?|([^a-zA-Z\s\\\\])(?:(?!\2)[^#\\\\])*(?:(?:#\{.*?\}|#|\\\\.)(?:(?!\2)[^#\\\\])*)*\2?|\\\\[^#\\\\]*(?:(?:#\{.*?\}|#)[^#\\\\]*)*\\\\?)/
+ elsif @scanner.scan(/:(?:#{GLOBAL_VARIABLE}|#{METHOD_NAME_EX}|#{STRING})/ox)
+ kind = :symbol
+ elsif @scanner.scan(/
+ \? (?:
+ [^\s\\]
+ |
+ \\ (?:M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-))? (?: \\ (?: . | [0-7]{3} | x[0-9A-Fa-f][0-9A-Fa-f] )
+ )
+ /mox)
+ kind = :integer
+
+ elsif @scanner.scan(/ [-+*\/%=<>;,|&!()\[\]{}~?] | \.\.?\.? | ::? /x)
+ kind = :operator
+ @regexp_allowed = :set if @scanner.matched[-1,1] =~ /[~=!<>|&^,\(\[+\-\/\*%]\z/
+ elsif @scanner.scan(FLOAT)
+ kind = :float
+ elsif @scanner.scan(INTEGER)
+ kind = :integer
+ else
+ @scanner.getch
+ end
+ end
+
+ token = Token.new @scanner.matched, kind
+
+ if kind == :regexp
+ token.text << @scanner.scan(/[eimnosux]*/)
+ end
+
+ @regexp_allowed = (@regexp_allowed == :set) # delayed flag setting
+
+ token
+ end
+end
+
+register Ruby, 'ruby', 'rb'
+
+ end
+end
+class Set
+ include Enumerable
+
+ # Creates a new set containing the given objects.
+ def self.[](*ary)
+ new(ary)
+ end
+
+ # Creates a new set containing the elements of the given enumerable
+ # object.
+ #
+ # If a block is given, the elements of enum are preprocessed by the
+ # given block.
+ def initialize(enum = nil, &block) # :yields: o
+ @hash ||= Hash.new
+
+ enum.nil? and return
+
+ if block
+ enum.each { |o| add(block[o]) }
+ else
+ merge(enum)
+ end
+ end
+
+ # Copy internal hash.
+ def initialize_copy(orig)
+ @hash = orig.instance_eval{@hash}.dup
+ end
+
+ # Returns the number of elements.
+ def size
+ @hash.size
+ end
+ alias length size
+
+ # Returns true if the set contains no elements.
+ def empty?
+ @hash.empty?
+ end
+
+ # Removes all elements and returns self.
+ def clear
+ @hash.clear
+ self
+ end
+
+ # Replaces the contents of the set with the contents of the given
+ # enumerable object and returns self.
+ def replace(enum)
+ if enum.class == self.class
+ @hash.replace(enum.instance_eval { @hash })
+ else
+ enum.is_a?(Enumerable) or raise ArgumentError, "value must be enumerable"
+ clear
+ enum.each { |o| add(o) }
+ end
+
+ self
+ end
+
+ # Converts the set to an array. The order of elements is uncertain.
+ def to_a
+ @hash.keys
+ end
+
+ def flatten_merge(set, seen = Set.new)
+ set.each { |e|
+ if e.is_a?(Set)
+ if seen.include?(e_id = e.object_id)
+ raise ArgumentError, "tried to flatten recursive Set"
+ end
+
+ seen.add(e_id)
+ flatten_merge(e, seen)
+ seen.delete(e_id)
+ else
+ add(e)
+ end
+ }
+
+ self
+ end
+ protected :flatten_merge
+
+ # Returns a new set that is a copy of the set, flattening each
+ # containing set recursively.
+ def flatten
+ self.class.new.flatten_merge(self)
+ end
+
+ # Equivalent to Set#flatten, but replaces the receiver with the
+ # result in place. Returns nil if no modifications were made.
+ def flatten!
+ if detect { |e| e.is_a?(Set) }
+ replace(flatten())
+ else
+ nil
+ end
+ end
+
+ # Returns true if the set contains the given object.
+ def include?(o)
+ @hash.include?(o)
+ end
+ alias member? include?
+
+ # Returns true if the set is a superset of the given set.
+ def superset?(set)
+ set.is_a?(Set) or raise ArgumentError, "value must be a set"
+ return false if size < set.size
+ set.all? { |o| include?(o) }
+ end
+
+ # Returns true if the set is a proper superset of the given set.
+ def proper_superset?(set)
+ set.is_a?(Set) or raise ArgumentError, "value must be a set"
+ return false if size <= set.size
+ set.all? { |o| include?(o) }
+ end
+
+ # Returns true if the set is a subset of the given set.
+ def subset?(set)
+ set.is_a?(Set) or raise ArgumentError, "value must be a set"
+ return false if set.size < size
+ all? { |o| set.include?(o) }
+ end
+
+ # Returns true if the set is a proper subset of the given set.
+ def proper_subset?(set)
+ set.is_a?(Set) or raise ArgumentError, "value must be a set"
+ return false if set.size <= size
+ all? { |o| set.include?(o) }
+ end
+
+ # Calls the given block once for each element in the set, passing
+ # the element as parameter.
+ def each
+ @hash.each_key { |o| yield(o) }
+ self
+ end
+
+ # Adds the given object to the set and returns self. Use +merge+ to
+ # add several elements at once.
+ def add(o)
+ @hash[o] = true
+ self
+ end
+ alias << add
+
+ # Adds the given object to the set and returns self. If the
+ # object is already in the set, returns nil.
+ def add?(o)
+ if include?(o)
+ nil
+ else
+ add(o)
+ end
+ end
+
+ # Deletes the given object from the set and returns self. Use +subtract+ to
+ # delete several items at once.
+ def delete(o)
+ @hash.delete(o)
+ self
+ end
+
+ # Deletes the given object from the set and returns self. If the
+ # object is not in the set, returns nil.
+ def delete?(o)
+ if include?(o)
+ delete(o)
+ else
+ nil
+ end
+ end
+
+ # Deletes every element of the set for which block evaluates to
+ # true, and returns self.
+ def delete_if
+ @hash.delete_if { |o,| yield(o) }
+ self
+ end
+
+ # Do collect() destructively.
+ def collect!
+ set = self.class.new
+ each { |o| set << yield(o) }
+ replace(set)
+ end
+ alias map! collect!
+
+ # Equivalent to Set#delete_if, but returns nil if no changes were
+ # made.
+ def reject!
+ n = size
+ delete_if { |o| yield(o) }
+ size == n ? nil : self
+ end
+
+ # Merges the elements of the given enumerable object to the set and
+ # returns self.
+ def merge(enum)
+ if enum.is_a?(Set)
+ @hash.update(enum.instance_eval { @hash })
+ else
+ enum.is_a?(Enumerable) or raise ArgumentError, "value must be enumerable"
+ enum.each { |o| add(o) }
+ end
+
+ self
+ end
+
+ # Deletes every element that appears in the given enumerable object
+ # and returns self.
+ def subtract(enum)
+ enum.is_a?(Enumerable) or raise ArgumentError, "value must be enumerable"
+ enum.each { |o| delete(o) }
+ self
+ end
+
+ # Returns a new set built by merging the set and the elements of the
+ # given enumerable object.
+ def |(enum)
+ enum.is_a?(Enumerable) or raise ArgumentError, "value must be enumerable"
+ dup.merge(enum)
+ end
+ alias + | ##
+ alias union | ##
+
+ # Returns a new set built by duplicating the set, removing every
+ # element that appears in the given enumerable object.
+ def -(enum)
+ enum.is_a?(Enumerable) or raise ArgumentError, "value must be enumerable"
+ dup.subtract(enum)
+ end
+ alias difference - ##
+
+ # Returns a new array containing elements common to the set and the
+ # given enumerable object.
+ def &(enum)
+ enum.is_a?(Enumerable) or raise ArgumentError, "value must be enumerable"
+ n = self.class.new
+ enum.each { |o| n.add(o) if include?(o) }
+ n
+ end
+ alias intersection & ##
+
+ # Returns a new array containing elements exclusive between the set
+ # and the given enumerable object. (set ^ enum) is equivalent to
+ # ((set | enum) - (set & enum)).
+ def ^(enum)
+ enum.is_a?(Enumerable) or raise ArgumentError, "value must be enumerable"
+ n = dup
+ enum.each { |o| if n.include?(o) then n.delete(o) else n.add(o) end }
+ n
+ end
+
+ # Returns true if two sets are equal. The equality of each couple
+ # of elements is defined according to Object#eql?.
+ def ==(set)
+ equal?(set) and return true
+
+ set.is_a?(Set) && size == set.size or return false
+
+ hash = @hash.dup
+ set.all? { |o| hash.include?(o) }
+ end
+
+ def hash # :nodoc:
+ @hash.hash
+ end
+
+ def eql?(o) # :nodoc:
+ return false unless o.is_a?(Set)
+ @hash.eql?(o.instance_eval{@hash})
+ end
+
+ # Classifies the set by the return value of the given block and
+ # returns a hash of {value => set of elements} pairs. The block is
+ # called once for each element of the set, passing the element as
+ # parameter.
+ #
+ # e.g.:
+ #
+ # require 'set'
+ # files = Set.new(Dir.glob("*.rb"))
+ # hash = files.classify { |f| File.mtime(f).year }
+ # p hash # => {2000=>#<Set: {"a.rb", "b.rb"}>,
+ # # 2001=>#<Set: {"c.rb", "d.rb", "e.rb"}>,
+ # # 2002=>#<Set: {"f.rb"}>}
+ def classify # :yields: o
+ h = {}
+
+ each { |i|
+ x = yield(i)
+ (h[x] ||= self.class.new).add(i)
+ }
+
+ h
+ end
+
+ # Divides the set into a set of subsets according to the commonality
+ # defined by the given block.
+ #
+ # If the arity of the block is 2, elements o1 and o2 are in common
+ # if block.call(o1, o2) is true. Otherwise, elements o1 and o2 are
+ # in common if block.call(o1) == block.call(o2).
+ #
+ # e.g.:
+ #
+ # require 'set'
+ # numbers = Set[1, 3, 4, 6, 9, 10, 11]
+ # set = numbers.divide { |i,j| (i - j).abs == 1 }
+ # p set # => #<Set: {#<Set: {1}>,
+ # # #<Set: {11, 9, 10}>,
+ # # #<Set: {3, 4}>,
+ # # #<Set: {6}>}>
+ def divide(&func)
+ if func.arity == 2
+ require 'tsort'
+
+ class << dig = {} # :nodoc:
+ include TSort
+
+ alias tsort_each_node each_key
+ def tsort_each_child(node, &block)
+ fetch(node).each(&block)
+ end
+ end
+
+ each { |u|
+ dig[u] = a = []
+ each{ |v| func.call(u, v) and a << v }
+ }
+
+ set = Set.new()
+ dig.each_strongly_connected_component { |css|
+ set.add(self.class.new(css))
+ }
+ set
+ else
+ Set.new(classify(&func).values)
+ end
+ end
+
+ InspectKey = :__inspect_key__ # :nodoc:
+
+ # Returns a string containing a human-readable representation of the
+ # set. ("#<Set: {element1, element2, ...}>")
+ def inspect
+ ids = (Thread.current[InspectKey] ||= [])
+
+ if ids.include?(object_id)
+ return sprintf('#<%s: {...}>', self.class.name)
+ end
+
+ begin
+ ids << object_id
+ return sprintf('#<%s: {%s}>', self.class, to_a.inspect[1..-2])
+ ensure
+ ids.pop
+ end
+ end
+
+ def pretty_print(pp) # :nodoc:
+ pp.text sprintf('#<%s: {', self.class.name)
+ pp.nest(1) {
+ pp.seplist(self) { |o|
+ pp.pp o
+ }
+ }
+ pp.text "}>"
+ end
+
+ def pretty_print_cycle(pp) # :nodoc:
+ pp.text sprintf('#<%s: {%s}>', self.class.name, empty? ? '' : '...')
+ end
+end
+
+# SortedSet implements a set which elements are sorted in order. See Set.
+class SortedSet < Set
+ @@setup = false
+
+ class << self
+ def [](*ary) # :nodoc:
+ new(ary)
+ end
+
+ def setup # :nodoc:
+ @@setup and return
+
+ begin
+ require 'rbtree'
+
+ module_eval %{
+ def initialize(*args, &block)
+ @hash = RBTree.new
+ super
+ end
+ }
+ rescue LoadError
+ module_eval %{
+ def initialize(*args, &block)
+ @keys = nil
+ super
+ end
+
+ def clear
+ @keys = nil
+ super
+ end
+
+ def replace(enum)
+ @keys = nil
+ super
+ end
+
+ def add(o)
+ @keys = nil
+ @hash[o] = true
+ self
+ end
+ alias << add
+
+ def delete(o)
+ @keys = nil
+ @hash.delete(o)
+ self
+ end
+
+ def delete_if
+ n = @hash.size
+ @hash.delete_if { |o,| yield(o) }
+ @keys = nil if @hash.size != n
+ self
+ end
+
+ def merge(enum)
+ @keys = nil
+ super
+ end
+
+ def each
+ to_a.each { |o| yield(o) }
+ end
+
+ def to_a
+ (@keys = @hash.keys).sort! unless @keys
+ @keys
+ end
+ }
+ end
+
+ @@setup = true
+ end
+ end
+
+ def initialize(*args, &block) # :nodoc:
+ SortedSet.setup
+ initialize(*args, &block)
+ end
+end
+
+module Enumerable
+ # Makes a set from the enumerable object with given arguments.
+ def to_set(klass = Set, *args, &block)
+ klass.new(self, *args, &block)
+ end
+end
+
+# =begin
+# == RestricedSet class
+# RestricedSet implements a set with restrictions defined by a given
+# block.
+#
+# === Super class
+# Set
+#
+# === Class Methods
+# --- RestricedSet::new(enum = nil) { |o| ... }
+# --- RestricedSet::new(enum = nil) { |rset, o| ... }
+# Creates a new restricted set containing the elements of the given
+# enumerable object. Restrictions are defined by the given block.
+#
+# If the block's arity is 2, it is called with the RestrictedSet
+# itself and an object to see if the object is allowed to be put in
+# the set.
+#
+# Otherwise, the block is called with an object to see if the object
+# is allowed to be put in the set.
+#
+# === Instance Methods
+# --- restriction_proc
+# Returns the restriction procedure of the set.
+#
+# =end
+#
+# class RestricedSet < Set
+# def initialize(*args, &block)
+# @proc = block or raise ArgumentError, "missing a block"
+#
+# if @proc.arity == 2
+# instance_eval %{
+# def add(o)
+# @hash[o] = true if @proc.call(self, o)
+# self
+# end
+# alias << add
+#
+# def add?(o)
+# if include?(o) || !@proc.call(self, o)
+# nil
+# else
+# @hash[o] = true
+# self
+# end
+# end
+#
+# def replace(enum)
+# enum.is_a?(Enumerable) or raise ArgumentError, "value must be enumerable"
+# clear
+# enum.each { |o| add(o) }
+#
+# self
+# end
+#
+# def merge(enum)
+# enum.is_a?(Enumerable) or raise ArgumentError, "value must be enumerable"
+# enum.each { |o| add(o) }
+#
+# self
+# end
+# }
+# else
+# instance_eval %{
+# def add(o)
+# if @proc.call(o)
+# @hash[o] = true
+# end
+# self
+# end
+# alias << add
+#
+# def add?(o)
+# if include?(o) || !@proc.call(o)
+# nil
+# else
+# @hash[o] = true
+# self
+# end
+# end
+# }
+# end
+#
+# super(*args)
+# end
+#
+# def restriction_proc
+# @proc
+# end
+# end
+
+if $0 == __FILE__
+ eval DATA.read, nil, $0, __LINE__+4
+end
+
+# = rweb - CGI Support Library
+#
+# Author:: Johannes Barre (mailto:rweb@igels.net)
+# Copyright:: Copyright (c) 2003, 04 by Johannes Barre
+# License:: GNU Lesser General Public License (COPYING, http://www.gnu.org/copyleft/lesser.html)
+# Version:: 0.1.0
+# CVS-ID:: $Id: rweb.rb 6 2004-06-16 15:56:26Z igel $
+#
+# == What is Rweb?
+# Rweb is a replacement for the cgi class included in the ruby distribution.
+#
+# == How to use
+#
+# === Basics
+#
+# This class is made to be as easy as possible to use. An example:
+#
+# require "rweb"
+#
+# web = Rweb.new
+# web.out do
+# web.puts "Hello world!"
+# end
+#
+# The visitor will get a simple "Hello World!" in his browser. Please notice,
+# that won't set html-tags for you, so you should better do something like this:
+#
+# require "rweb"
+#
+# web = Rweb.new
+# web.out do
+# web.puts "<html><body>Hello world!</body></html>"
+# end
+#
+# === Set headers
+# Of course, it's also possible to tell the browser, that the content of this
+# page is plain text instead of html code:
+#
+# require "rweb"
+#
+# web = Rweb.new
+# web.out do
+# web.header("content-type: text/plain")
+# web.puts "Hello plain world!"
+# end
+#
+# Please remember, headers can't be set after the page content has been send.
+# You have to set all nessessary headers before the first puts oder print. It's
+# possible to cache the content until everything is complete. Doing it this
+# way, you can set headers everywhere.
+#
+# If you set a header twice, the second header will replace the first one. The
+# header name is not casesensitive, it will allways converted in to the
+# capitalised form suggested by the w3c (http://w3.org)
+#
+# === Set cookies
+# Setting cookies is quite easy:
+# include 'rweb'
+#
+# web = Rweb.new
+# Cookie.new("Visits", web.cookies['visits'].to_i +1)
+# web.out do
+# web.puts "Welcome back! You visited this page #{web.cookies['visits'].to_i +1} times"
+# end
+#
+# See the class Cookie for more details.
+#
+# === Get form and cookie values
+# There are four ways to submit data from the browser to the server and your
+# ruby script: via GET, POST, cookies and file upload. Rweb doesn't support
+# file upload by now.
+#
+# include 'rweb'
+#
+# web = Rweb.new
+# web.out do
+# web.print "action: #{web.get['action']} "
+# web.puts "The value of the cookie 'visits' is #{web.cookies['visits']}"
+# web.puts "The post parameter 'test['x']' is #{web.post['test']['x']}"
+# end
+
+RWEB_VERSION = "0.1.0"
+RWEB = "rweb/#{RWEB_VERSION}"
+
+#require 'rwebcookie' -> edit by bunny :-)
+
+class Rweb
+ # All parameter submitted via the GET method are available in attribute
+ # get. This is Hash, where every parameter is available as a key-value
+ # pair.
+ #
+ # If your input tag has a name like this one, it's value will be available
+ # as web.get["fieldname"]
+ # <input name="fieldname">
+ # You can submit values as a Hash
+ # <input name="text['index']">
+ # <input name="text['index2']">
+ # will be available as
+ # web.get["text"]["index"]
+ # web.get["text"]["index2"]
+ # Integers are also possible
+ # <input name="int[2]">
+ # <input name="int[3]['hi']>
+ # will be available as
+ # web.get["int"][2]
+ # web.get["int"][3]["hi"]
+ # If you specify no index, the lowest unused index will be used:
+ # <input name="int[]"><!-- First Field -->
+ # <input name="int[]"><!-- Second one -->
+ # will be available as
+ # web.get["int"][0] # First Field
+ # web.get["int"][1] # Second one
+ # Please notice, this doesn'd work like you might expect:
+ # <input name="text[index]">
+ # It will not be available as web.get["text"]["index"] but
+ # web.get["text[index]"]
+ attr_reader :get
+
+ # All parameters submitted via POST are available in the attribute post. It
+ # works like the get attribute.
+ # <input name="text[0]">
+ # will be available as
+ # web.post["text"][0]
+ attr_reader :post
+
+ # All cookies submitted by the browser are available in cookies. This is a
+ # Hash, where every cookie is a key-value pair.
+ attr_reader :cookies
+
+ # The name of the browser identification is submitted as USER_AGENT and
+ # available in this attribute.
+ attr_reader :user_agent
+
+ # The IP address of the client.
+ attr_reader :remote_addr
+
+ # Creates a new Rweb object. This should only done once. You can set various
+ # options via the settings hash.
+ #
+ # "cache" => true: Everything you script send to the client will be cached
+ # until the end of the out block or until flush is called. This way, you
+ # can modify headers and cookies even after printing something to the client.
+ #
+ # "safe" => level: Changes the $SAFE attribute. By default, $SAFE will be set
+ # to 1. If $SAFE is already higher than this value, it won't be changed.
+ #
+ # "silend" => true: Normaly, Rweb adds automaticly a header like this
+ # "X-Powered-By: Rweb/x.x.x (Ruby/y.y.y)". With the silend option you can
+ # suppress this.
+ def initialize (settings = {})
+ # {{{
+ @header = {}
+ @cookies = {}
+ @get = {}
+ @post = {}
+
+ # Internal attributes
+ @status = nil
+ @reasonPhrase = nil
+ @setcookies = []
+ @output_started = false;
+ @output_allowed = false;
+
+ @mod_ruby = false
+ @env = ENV.to_hash
+
+ if defined?(MOD_RUBY)
+ @output_method = "mod_ruby"
+ @mod_ruby = true
+ elsif @env['SERVER_SOFTWARE'] =~ /^Microsoft-IIS/i
+ @output_method = "nph"
+ else
+ @output_method = "ph"
+ end
+
+ unless settings.is_a?(Hash)
+ raise TypeError, "settings must be a Hash"
+ end
+ @settings = settings
+
+ unless @settings.has_key?("safe")
+ @settings["safe"] = 1
+ end
+
+ if $SAFE < @settings["safe"]
+ $SAFE = @settings["safe"]
+ end
+
+ unless @settings.has_key?("cache")
+ @settings["cache"] = false
+ end
+
+ # mod_ruby sets no QUERY_STRING variable, if no GET-Parameters are given
+ unless @env.has_key?("QUERY_STRING")
+ @env["QUERY_STRING"] = ""
+ end
+
+ # Now we split the QUERY_STRING by the seperators & and ; or, if
+ # specified, settings['get seperator']
+ unless @settings.has_key?("get seperator")
+ get_args = @env['QUERY_STRING'].split(/[&;]/)
+ else
+ get_args = @env['QUERY_STRING'].split(@settings['get seperator'])
+ end
+
+ get_args.each do | arg |
+ arg_key, arg_val = arg.split(/=/, 2)
+ arg_key = Rweb::unescape(arg_key)
+ arg_val = Rweb::unescape(arg_val)
+
+ # Parse names like name[0], name['text'] or name[]
+ pattern = /^(.+)\[("[^\]]*"|'[^\]]*'|[0-9]*)\]$/
+ keys = []
+ while match = pattern.match(arg_key)
+ arg_key = match[1]
+ keys = [match[2]] + keys
+ end
+ keys = [arg_key] + keys
+
+ akt = @get
+ last = nil
+ lastkey = nil
+ keys.each do |key|
+ if key == ""
+ # No key specified (like in "test[]"), so we use the
+ # lowerst unused Integer as key
+ key = 0
+ while akt.has_key?(key)
+ key += 1
+ end
+ elsif /^[0-9]*$/ =~ key
+ # If the index is numerical convert it to an Integer
+ key = key.to_i
+ elsif key[0].chr == "'" || key[0].chr == '"'
+ key = key[1, key.length() -2]
+ end
+ if !akt.has_key?(key) || !akt[key].class == Hash
+ # create an empty Hash if there isn't already one
+ akt[key] = {}
+ end
+ last = akt
+ lastkey = key
+ akt = akt[key]
+ end
+ last[lastkey] = arg_val
+ end
+
+ if @env['REQUEST_METHOD'] == "POST"
+ if @env.has_key?("CONTENT_TYPE") && @env['CONTENT_TYPE'] == "application/x-www-form-urlencoded" && @env.has_key?('CONTENT_LENGTH')
+ unless @settings.has_key?("post seperator")
+ post_args = $stdin.read(@env['CONTENT_LENGTH'].to_i).split(/[&;]/)
+ else
+ post_args = $stdin.read(@env['CONTENT_LENGTH'].to_i).split(@settings['post seperator'])
+ end
+ post_args.each do | arg |
+ arg_key, arg_val = arg.split(/=/, 2)
+ arg_key = Rweb::unescape(arg_key)
+ arg_val = Rweb::unescape(arg_val)
+
+ # Parse names like name[0], name['text'] or name[]
+ pattern = /^(.+)\[("[^\]]*"|'[^\]]*'|[0-9]*)\]$/
+ keys = []
+ while match = pattern.match(arg_key)
+ arg_key = match[1]
+ keys = [match[2]] + keys
+ end
+ keys = [arg_key] + keys
+
+ akt = @post
+ last = nil
+ lastkey = nil
+ keys.each do |key|
+ if key == ""
+ # No key specified (like in "test[]"), so we use
+ # the lowerst unused Integer as key
+ key = 0
+ while akt.has_key?(key)
+ key += 1
+ end
+ elsif /^[0-9]*$/ =~ key
+ # If the index is numerical convert it to an Integer
+ key = key.to_i
+ elsif key[0].chr == "'" || key[0].chr == '"'
+ key = key[1, key.length() -2]
+ end
+ if !akt.has_key?(key) || !akt[key].class == Hash
+ # create an empty Hash if there isn't already one
+ akt[key] = {}
+ end
+ last = akt
+ lastkey = key
+ akt = akt[key]
+ end
+ last[lastkey] = arg_val
+ end
+ else
+ # Maybe we should print a warning here?
+ $stderr.print("Unidentified form data recived and discarded.")
+ end
+ end
+
+ if @env.has_key?("HTTP_COOKIE")
+ cookie = @env['HTTP_COOKIE'].split(/; ?/)
+ cookie.each do | c |
+ cookie_key, cookie_val = c.split(/=/, 2)
+
+ @cookies [Rweb::unescape(cookie_key)] = Rweb::unescape(cookie_val)
+ end
+ end
+
+ if defined?(@env['HTTP_USER_AGENT'])
+ @user_agent = @env['HTTP_USER_AGENT']
+ else
+ @user_agent = nil;
+ end
+
+ if defined?(@env['REMOTE_ADDR'])
+ @remote_addr = @env['REMOTE_ADDR']
+ else
+ @remote_addr = nil
+ end
+ # }}}
+ end
+
+ # Prints a String to the client. If caching is enabled, the String will
+ # buffered until the end of the out block ends.
+ def print(str = "")
+ # {{{
+ unless @output_allowed
+ raise "You just can write to output inside of a Rweb::out-block"
+ end
+
+ if @settings["cache"]
+ @buffer += [str.to_s]
+ else
+ unless @output_started
+ sendHeaders
+ end
+ $stdout.print(str)
+ end
+ nil
+ # }}}
+ end
+
+ # Prints a String to the client and adds a line break at the end. Please
+ # remember, that a line break is not visible in HTML, use the <br> HTML-Tag
+ # for this. If caching is enabled, the String will buffered until the end
+ # of the out block ends.
+ def puts(str = "")
+ # {{{
+ self.print(str + "\n")
+ # }}}
+ end
+
+ # Alias to print.
+ def write(str = "")
+ # {{{
+ self.print(str)
+ # }}}
+ end
+
+ # If caching is enabled, all cached data are send to the cliend and the
+ # cache emptied.
+ def flush
+ # {{{
+ unless @output_allowed
+ raise "You can't use flush outside of a Rweb::out-block"
+ end
+ buffer = @buffer.join
+
+ unless @output_started
+ sendHeaders
+ end
+ $stdout.print(buffer)
+
+ @buffer = []
+ # }}}
+ end
+
+ # Sends one or more header to the client. All headers are cached just
+ # before body data are send to the client. If the same header are set
+ # twice, only the last value is send.
+ #
+ # Example:
+ # web.header("Last-Modified: Mon, 16 Feb 2004 20:15:41 GMT")
+ # web.header("Location: http://www.ruby-lang.org")
+ #
+ # You can specify more than one header at the time by doing something like
+ # this:
+ # web.header("Content-Type: text/plain\nContent-Length: 383")
+ # or
+ # web.header(["Content-Type: text/plain", "Content-Length: 383"])
+ def header(str)
+ # {{{
+ if @output_started
+ raise "HTTP-Headers are already send. You can't change them after output has started!"
+ end
+ unless @output_allowed
+ raise "You just can set headers inside of a Rweb::out-block"
+ end
+ if str.is_a?Array
+ str.each do | value |
+ self.header(value)
+ end
+
+ elsif str.split(/\n/).length > 1
+ str.split(/\n/).each do | value |
+ self.header(value)
+ end
+
+ elsif str.is_a? String
+ str.gsub!(/\r/, "")
+
+ if (str =~ /^HTTP\/1\.[01] [0-9]{3} ?.*$/) == 0
+ pattern = /^HTTP\/1.[01] ([0-9]{3}) ?(.*)$/
+
+ result = pattern.match(str)
+ self.setstatus(result[0], result[1])
+ elsif (str =~ /^status: [0-9]{3} ?.*$/i) == 0
+ pattern = /^status: ([0-9]{3}) ?(.*)$/i
+
+ result = pattern.match(str)
+ self.setstatus(result[0], result[1])
+ else
+ a = str.split(/: ?/, 2)
+
+ @header[a[0].downcase] = a[1]
+ end
+ end
+ # }}}
+ end
+
+ # Changes the status of this page. There are several codes like "200 OK",
+ # "302 Found", "404 Not Found" or "500 Internal Server Error". A list of
+ # all codes is available at
+ # http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10
+ #
+ # You can just send the code number, the reason phrase will be added
+ # automaticly with the recommendations from the w3c if not specified. If
+ # you set the status twice or more, only the last status will be send.
+ # Examples:
+ # web.status("401 Unauthorized")
+ # web.status("410 Sad but true, this lonely page is gone :(")
+ # web.status(206)
+ # web.status("400")
+ #
+ # The default status is "200 OK". If a "Location" header is set, the
+ # default status is "302 Found".
+ def status(str)
+ # {{{
+ if @output_started
+ raise "HTTP-Headers are already send. You can't change them after output has started!"
+ end
+ unless @output_allowed
+ raise "You just can set headers inside of a Rweb::out-block"
+ end
+ if str.is_a?Integer
+ @status = str
+ elsif str.is_a?String
+ p1 = /^([0-9]{3}) ?(.*)$/
+ p2 = /^HTTP\/1\.[01] ([0-9]{3}) ?(.*)$/
+ p3 = /^status: ([0-9]{3}) ?(.*)$/i
+
+ if (a = p1.match(str)) == nil
+ if (a = p2.match(str)) == nil
+ if (a = p3.match(str)) == nil
+ raise ArgumentError, "Invalid argument", caller
+ end
+ end
+ end
+ @status = a[1].to_i
+ if a[2] != ""
+ @reasonPhrase = a[2]
+ else
+ @reasonPhrase = getReasonPhrase(@status)
+ end
+ else
+ raise ArgumentError, "Argument of setstatus must be integer or string", caller
+ end
+ # }}}
+ end
+
+ # Handles the output of your content and rescues all exceptions. Send all
+ # data in the block to this method. For example:
+ # web.out do
+ # web.header("Content-Type: text/plain")
+ # web.puts("Hello, plain world!")
+ # end
+ def out
+ # {{{
+ @output_allowed = true
+ @buffer = []; # We use an array as buffer, because it's more performant :)
+
+ begin
+ yield
+ rescue Exception => exception
+ $stderr.puts "Ruby exception rescued (#{exception.class}): #{exception.message}"
+ $stderr.puts exception.backtrace.join("\n")
+
+ unless @output_started
+ self.setstatus(500)
+ @header = {}
+ end
+
+ unless (@settings.has_key?("hide errors") and @settings["hide errors"] == true)
+ unless @output_started
+ self.header("Content-Type: text/html")
+ self.puts "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Strict//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">"
+ self.puts "<html>"
+ self.puts "<head>"
+ self.puts "<title>500 Internal Server Error</title>"
+ self.puts "</head>"
+ self.puts "<body>"
+ end
+ if @header.has_key?("content-type") and (@header["content-type"] =~ /^text\/html/i) == 0
+ self.puts "<h1>Internal Server Error</h1>"
+ self.puts "<p>The server encountered an exception and was unable to complete your request.</p>"
+ self.puts "<p>The exception has provided the following information:</p>"
+ self.puts "<pre style=\"background: #FFCCCC; border: black solid 2px; margin-left: 2cm; margin-right: 2cm; padding: 2mm;\"><b>#{exception.class}</b>: #{exception.message} <b>on</b>"
+ self.puts
+ self.puts "#{exception.backtrace.join("\n")}</pre>"
+ self.puts "</body>"
+ self.puts "</html>"
+ else
+ self.puts "The server encountered an exception and was unable to complete your request"
+ self.puts "The exception has provided the following information:"
+ self.puts "#{exception.class}: #{exception.message}"
+ self.puts
+ self.puts exception.backtrace.join("\n")
+ end
+ end
+ end
+
+ if @settings["cache"]
+ buffer = @buffer.join
+
+ unless @output_started
+ unless @header.has_key?("content-length")
+ self.header("content-length: #{buffer.length}")
+ end
+
+ sendHeaders
+ end
+ $stdout.print(buffer)
+ elsif !@output_started
+ sendHeaders
+ end
+ @output_allowed = false;
+ # }}}
+ end
+
+ # Decodes URL encoded data, %20 for example stands for a space.
+ def Rweb.unescape(str)
+ # {{{
+ if defined? str and str.is_a? String
+ str.gsub!(/\+/, " ")
+ str.gsub(/%.{2}/) do | s |
+ s[1,2].hex.chr
+ end
+ end
+ # }}}
+ end
+
+ protected
+ def sendHeaders
+ # {{{
+
+ Cookie.disallow # no more cookies can be set or modified
+ if !(@settings.has_key?("silent") and @settings["silent"] == true) and !@header.has_key?("x-powered-by")
+ if @mod_ruby
+ header("x-powered-by: #{RWEB} (Ruby/#{RUBY_VERSION}, #{MOD_RUBY})");
+ else
+ header("x-powered-by: #{RWEB} (Ruby/#{RUBY_VERSION})");
+ end
+ end
+
+ if @output_method == "ph"
+ if ((@status == nil or @status == 200) and !@header.has_key?("content-type") and !@header.has_key?("location"))
+ header("content-type: text/html")
+ end
+
+ if @status != nil
+ $stdout.print "Status: #{@status} #{@reasonPhrase}\r\n"
+ end
+
+ @header.each do |key, value|
+ key = key *1 # "unfreeze" key :)
+ key[0] = key[0,1].upcase![0]
+
+ key = key.gsub(/-[a-z]/) do |char|
+ "-" + char[1,1].upcase
+ end
+
+ $stdout.print "#{key}: #{value}\r\n"
+ end
+ cookies = Cookie.getHttpHeader # Get all cookies as an HTTP Header
+ if cookies
+ $stdout.print cookies
+ end
+
+ $stdout.print "\r\n"
+
+ elsif @output_method == "nph"
+ elsif @output_method == "mod_ruby"
+ r = Apache.request
+
+ if ((@status == nil or @status == 200) and !@header.has_key?("content-type") and !@header.has_key?("location"))
+ header("text/html")
+ end
+
+ if @status != nil
+ r.status_line = "#{@status} #{@reasonPhrase}"
+ end
+
+ r.send_http_header
+ @header.each do |key, value|
+ key = key *1 # "unfreeze" key :)
+
+ key[0] = key[0,1].upcase![0]
+ key = key.gsub(/-[a-z]/) do |char|
+ "-" + char[1,1].upcase
+ end
+ puts "#{key}: #{value.class}"
+ #r.headers_out[key] = value
+ end
+ end
+ @output_started = true
+ # }}}
+ end
+
+ def getReasonPhrase (status)
+ # {{{
+ if status == 100
+ "Continue"
+ elsif status == 101
+ "Switching Protocols"
+ elsif status == 200
+ "OK"
+ elsif status == 201
+ "Created"
+ elsif status == 202
+ "Accepted"
+ elsif status == 203
+ "Non-Authoritative Information"
+ elsif status == 204
+ "No Content"
+ elsif status == 205
+ "Reset Content"
+ elsif status == 206
+ "Partial Content"
+ elsif status == 300
+ "Multiple Choices"
+ elsif status == 301
+ "Moved Permanently"
+ elsif status == 302
+ "Found"
+ elsif status == 303
+ "See Other"
+ elsif status == 304
+ "Not Modified"
+ elsif status == 305
+ "Use Proxy"
+ elsif status == 307
+ "Temporary Redirect"
+ elsif status == 400
+ "Bad Request"
+ elsif status == 401
+ "Unauthorized"
+ elsif status == 402
+ "Payment Required"
+ elsif status == 403
+ "Forbidden"
+ elsif status == 404
+ "Not Found"
+ elsif status == 405
+ "Method Not Allowed"
+ elsif status == 406
+ "Not Acceptable"
+ elsif status == 407
+ "Proxy Authentication Required"
+ elsif status == 408
+ "Request Time-out"
+ elsif status == 409
+ "Conflict"
+ elsif status == 410
+ "Gone"
+ elsif status == 411
+ "Length Required"
+ elsif status == 412
+ "Precondition Failed"
+ elsif status == 413
+ "Request Entity Too Large"
+ elsif status == 414
+ "Request-URI Too Large"
+ elsif status == 415
+ "Unsupported Media Type"
+ elsif status == 416
+ "Requested range not satisfiable"
+ elsif status == 417
+ "Expectation Failed"
+ elsif status == 500
+ "Internal Server Error"
+ elsif status == 501
+ "Not Implemented"
+ elsif status == 502
+ "Bad Gateway"
+ elsif status == 503
+ "Service Unavailable"
+ elsif status == 504
+ "Gateway Time-out"
+ elsif status == 505
+ "HTTP Version not supported"
+ else
+ raise "Unknown Statuscode. See http://www.w3.org/Protocols/rfc2616/rfc2616-sec6.html#sec6.1 for more information."
+ end
+ # }}}
+ end
+end
+
+class Cookie
+ attr_reader :name, :value, :maxage, :path, :domain, :secure, :comment
+
+ # Sets a cookie. Please see below for details of the attributes.
+ def initialize (name, value = nil, maxage = nil, path = nil, domain = nil, secure = false)
+ # {{{
+ # HTTP headers (Cookies are a HTTP header) can only set, while no content
+ # is send. So an exception will be raised, when @@allowed is set to false
+ # and a new cookie has set.
+ unless defined?(@@allowed)
+ @@allowed = true
+ end
+ unless @@allowed
+ raise "You can't set cookies after the HTTP headers are send."
+ end
+
+ unless defined?(@@list)
+ @@list = []
+ end
+ @@list += [self]
+
+ unless defined?(@@type)
+ @@type = "netscape"
+ end
+
+ unless name.class == String
+ raise TypeError, "The name of a cookie must be a string", caller
+ end
+ if value.class.superclass == Integer || value.class == Float
+ value = value.to_s
+ elsif value.class != String && value != nil
+ raise TypeError, "The value of a cookie must be a string, integer, float or nil", caller
+ end
+ if maxage.class == Time
+ maxage = maxage - Time.now
+ elsif !maxage.class.superclass == Integer || !maxage == nil
+ raise TypeError, "The maxage date of a cookie must be an Integer or Time object or nil.", caller
+ end
+ unless path.class == String || path == nil
+ raise TypeError, "The path of a cookie must be nil or a string", caller
+ end
+ unless domain.class == String || domain == nil
+ raise TypeError, "The value of a cookie must be nil or a string", caller
+ end
+ unless secure == true || secure == false
+ raise TypeError, "The secure field of a cookie must be true or false", caller
+ end
+
+ @name, @value, @maxage, @path, @domain, @secure = name, value, maxage, path, domain, secure
+ @comment = nil
+ # }}}
+ end
+
+ # Modifies the value of this cookie. The information you want to store. If the
+ # value is nil, the cookie will be deleted by the client.
+ #
+ # This attribute can be a String, Integer or Float object or nil.
+ def value=(value)
+ # {{{
+ if value.class.superclass == Integer || value.class == Float
+ value = value.to_s
+ elsif value.class != String && value != nil
+ raise TypeError, "The value of a cookie must be a string, integer, float or nil", caller
+ end
+ @value = value
+ # }}}
+ end
+
+ # Modifies the maxage of this cookie. This attribute defines the lifetime of
+ # the cookie, in seconds. A value of 0 means the cookie should be discarded
+ # imediatly. If it set to nil, the cookie will be deleted when the browser
+ # will be closed.
+ #
+ # Attention: This is different from other implementations like PHP, where you
+ # gives the seconds since 1/1/1970 0:00:00 GMT.
+ #
+ # This attribute must be an Integer or Time object or nil.
+ def maxage=(maxage)
+ # {{{
+ if maxage.class == Time
+ maxage = maxage - Time.now
+ elsif maxage.class.superclass == Integer || !maxage == nil
+ raise TypeError, "The maxage of a cookie must be an Interger or Time object or nil.", caller
+ end
+ @maxage = maxage
+ # }}}
+ end
+
+ # Modifies the path value of this cookie. The client will send this cookie
+ # only, if the requested document is this directory or a subdirectory of it.
+ #
+ # The value of the attribute must be a String object or nil.
+ def path=(path)
+ # {{{
+ unless path.class == String || path == nil
+ raise TypeError, "The path of a cookie must be nil or a string", caller
+ end
+ @path = path
+ # }}}
+ end
+
+ # Modifies the domain value of this cookie. The client will send this cookie
+ # only if it's connected with this domain (or a subdomain, if the first
+ # character is a dot like in ".ruby-lang.org")
+ #
+ # The value of this attribute must be a String or nil.
+ def domain=(domain)
+ # {{{
+ unless domain.class == String || domain == nil
+ raise TypeError, "The domain of a cookie must be a String or nil.", caller
+ end
+ @domain = domain
+ # }}}
+ end
+
+ # Modifies the secure flag of this cookie. If it's true, the client will only
+ # send this cookie if it is secured connected with us.
+ #
+ # The value od this attribute has to be true or false.
+ def secure=(secure)
+ # {{{
+ unless secure == true || secure == false
+ raise TypeError, "The secure field of a cookie must be true or false", caller
+ end
+ @secure = secure
+ # }}}
+ end
+
+ # Modifies the comment value of this cookie. The comment won't be send, if
+ # type is "netscape".
+ def comment=(comment)
+ # {{{
+ unless comment.class == String || comment == nil
+ raise TypeError, "The comment of a cookie must be a string or nil", caller
+ end
+ @comment = comment
+ # }}}
+ end
+
+ # Changes the type of all cookies.
+ # Allowed values are RFC2109 and netscape (default).
+ def Cookie.type=(type)
+ # {{{
+ unless @@allowed
+ raise "The cookies are allready send, so you can't change the type anymore."
+ end
+ unless type.downcase == "rfc2109" && type.downcase == "netscape"
+ raise "The type of the cookies must be \"RFC2109\" or \"netscape\"."
+ end
+ @@type = type;
+ # }}}
+ end
+
+ # After sending this message, no cookies can be set or modified. Use it, when
+ # HTTP-Headers are send. Rweb does this for you.
+ def Cookie.disallow
+ # {{{
+ @@allowed = false
+ true
+ # }}}
+ end
+
+ # Returns a HTTP header (type String) with all cookies. Rweb does this for
+ # you.
+ def Cookie.getHttpHeader
+ # {{{
+ if defined?(@@list)
+ if @@type == "netscape"
+ str = ""
+ @@list.each do |cookie|
+ if cookie.value == nil
+ cookie.maxage = 0
+ cookie.value = ""
+ end
+ # TODO: Name and value should be escaped!
+ str += "Set-Cookie: #{cookie.name}=#{cookie.value}"
+ unless cookie.maxage == nil
+ expire = Time.now + cookie.maxage
+ expire.gmtime
+ str += "; Expire=#{expire.strftime("%a, %d-%b-%Y %H:%M:%S %Z")}"
+ end
+ unless cookie.domain == nil
+ str += "; Domain=#{cookie.domain}"
+ end
+ unless cookie.path == nil
+ str += "; Path=#{cookie.path}"
+ end
+ if cookie.secure
+ str += "; Secure"
+ end
+ str += "\r\n"
+ end
+ return str
+ else # type == "RFC2109"
+ str = "Set-Cookie: "
+ comma = false;
+
+ @@list.each do |cookie|
+ if cookie.value == nil
+ cookie.maxage = 0
+ cookie.value = ""
+ end
+ if comma
+ str += ","
+ end
+ comma = true
+
+ str += "#{cookie.name}=\"#{cookie.value}\""
+ unless cookie.maxage == nil
+ str += "; Max-Age=\"#{cookie.maxage}\""
+ end
+ unless cookie.domain == nil
+ str += "; Domain=\"#{cookie.domain}\""
+ end
+ unless cookie.path == nil
+ str += "; Path=\"#{cookie.path}\""
+ end
+ if cookie.secure
+ str += "; Secure"
+ end
+ unless cookie.comment == nil
+ str += "; Comment=\"#{cookie.comment}\""
+ end
+ str += "; Version=\"1\""
+ end
+ str
+ end
+ else
+ false
+ end
+ # }}}
+ end
+end
+
+require 'strscan'
+
+module BBCode
+ DEBUG = true
+
+ use 'encoder', 'tags', 'tagstack', 'smileys'
+
+=begin
+ The Parser class takes care of the encoding.
+ It scans the given BBCode (as plain text), finds tags
+ and smilies and also makes links of urls in text.
+
+ Normal text is send directly to the encoder.
+
+ If a tag was found, an instance of a Tag subclass is created
+ to handle the case.
+
+ The @tagstack manages tag nesting and ensures valid HTML.
+=end
+
+ class Parser
+ class Attribute
+ # flatten and use only one empty_arg
+ def self.create attr
+ attr = flatten attr
+ return @@empty_attr if attr.empty?
+ new attr
+ end
+
+ private_class_method :new
+
+ # remove leading and trailing whitespace; concat lines
+ def self.flatten attr
+ attr.strip.gsub(/\n/, ' ')
+ # -> ^ and $ can only match at begin and end now
+ end
+
+ ATTRIBUTE_SCAN = /
+ (?!$) # don't match at end
+ \s*
+ ( # $1 = key
+ [^=\s\]"\\]*
+ (?:
+ (?: \\. | "[^"\\]*(?:\\.[^"\\]*)*"? )
+ [^=\s\]"\\]*
+ )*
+ )
+ (?:
+ =
+ ( # $2 = value
+ [^\s\]"\\]*
+ (?:
+ (?: \\. | "[^"\\]*(?:\\.[^"\\]*)*"? )
+ [^\s\]"\\]*
+ )*
+ )?
+ )?
+ \s*
+ /x
+
+ def self.parse source
+ source = source.dup
+ # empty_tag: the tag looks like [... /]
+ # slice!: this deletes the \s*/] at the end
+ # \s+ because [url=http://rubybb.org/forum/] is NOT an empty tag.
+ # In RubyBBCode, you can use [url=http://rubybb.org/forum/ /], and this has to be
+ # interpreted correctly.
+ empty_tag = source.sub!(/^:/, '=') or source.slice!(/\/$/)
+ debug 'PARSE: ' + source.inspect + ' => ' + empty_tag.inspect
+ #-> we have now an attr that's EITHER empty OR begins and ends with non-whitespace.
+
+ attr = Hash.new
+ attr[:flags] = []
+ source.scan(ATTRIBUTE_SCAN) { |key, value|
+ if not value
+ attr[:flags] << unescape(key)
+ else
+ next if value.empty? and key.empty?
+ attr[unescape(key)] = unescape(value)
+ end
+ }
+ debug attr.inspect
+
+ return empty_tag, attr
+ end
+
+ def self.unescape_char esc
+ esc[1]
+ end
+
+ def self.unquote qt
+ qt[1..-1].chomp('"').gsub(/\\./) { |esc| unescape_char esc }
+ end
+
+ def self.unescape str
+ str.gsub(/ (\\.) | (" [^"\\]* (?:\\.[^"\\]*)* "?) /x) {
+ if $1
+ unescape_char $1
+ else
+ unquote $2
+ end
+ }
+ end
+
+ include Enumerable
+ def each &block
+ @args.each(&block)
+ end
+
+ attr_reader :source, :args, :value
+
+ def initialize source
+ @source = source
+ debug 'Attribute#new(%p)' % source
+ @empty_tag, @attr = Attribute.parse source
+ @value = @attr[''].to_s
+ end
+
+ def empty?
+ self == @@empty_attr
+ end
+
+ def empty_tag?
+ @empty_tag
+ end
+
+ def [] *keys
+ res = @attr[*keys]
+ end
+
+ def flags
+ attr[:flags]
+ end
+
+ def to_s
+ @attr
+ end
+
+ def inspect
+ 'ATTR[' + @attr.inspect + (@empty_tag ? ' | empty tag' : '') + ']'
+ end
+ end
+ class Attribute
+ @@empty_attr = new ''
+ end
+ end
+
+ class Parser
+ def Parser.flatten str
+ # replace mac & dos newlines with unix style
+ str.gsub(/\r\n?/, "\n")
+ end
+
+ def initialize input = ''
+ # input manager
+ @scanner = StringScanner.new ''
+ # output manager
+ @encoder = Encoder.new
+ @output = ''
+ # tag manager
+ @tagstack = TagStack.new(@encoder)
+
+ @do_magic = true
+ # set the input
+ feed input
+ end
+
+ # if you want, you can feed a parser instance after creating,
+ # or even feed it repeatedly.
+ def feed food
+ @scanner.string = Parser.flatten food
+ end
+
+ # parse through the string using parse_token
+ def parse
+ parse_token until @scanner.eos?
+ @tagstack.close_all
+ @output = parse_magic @encoder.output
+ end
+
+ def output
+ @output
+ end
+
+ # ok, internals start here
+ private
+ # the default output functions. everything should use them or the tags.
+ def add_text text = @scanner.matched
+ @encoder.add_text text
+ end
+
+ # use this carefully
+ def add_html html
+ @encoder.add_html html
+ end
+
+ # highlights the text as error
+ def add_garbage garbage
+ add_html '<span class="error">' if DEBUG
+ add_text garbage
+ add_html '</span>' if DEBUG
+ end
+
+ # unknown and incorrectly nested tags are ignored and
+ # sent as plaintext (garbage in - garbage out).
+ # in debug mode, garbage is marked with lime background.
+ def garbage_out start
+ @scanner.pos = start
+ garbage = @scanner.scan(/./m)
+ debug 'GARBAGE: ' + garbage
+ add_garbage garbage
+ end
+
+ # simple text; everything but [, \[ allowed
+ SIMPLE_TEXT_SCAN_ = /
+ [^\[\\]* # normal*
+ (?: # (
+ \\.? # special
+ [^\[\\]* # normal*
+ )* # )*
+ /mx
+ SIMPLE_TEXT_SCAN = /[^\[]+/
+
+=begin
+
+ WHAT IS A TAG?
+ ==============
+
+ Tags in BBCode can be much more than just a simple [b].
+ I use many terms here to differ the parts of each tag.
+
+ Basic scheme:
+ [ code ]
+ TAG START TAG INFO TAG END
+
+ Most tags need a second tag to close the range it opened.
+ This is done with CLOSING TAGS:
+ [/code]
+ or by using empty tags that have no content and close themselfes:
+ [url=winamp.com /]
+ You surely know this from HTML.
+ These slashes define the TAG KIND = normal|closing|empty and
+ cannot be used together.
+
+ Everything between [ and ] and expluding the slashes is called the
+ TAG INFO. This info may contain:
+ - TAG ID
+ - TAG NAME including the tag id
+ - attributes
+
+ The TAG ID is the first char of the info:
+
+ TAG | ID
+ ----------+----
+ [quote] | q
+ [±] | &
+ ["[b]"] | "
+ [/url] | u
+ [---] | -
+
+ As you can see, the tag id shows the TAG TYPE, it can be a
+ normal tag, a formatting tag or an entity.
+ Therefor, the parser first scans the id to decide how to go
+ on with parsing.
+=end
+ # tag
+ # TODO more complex expression allowing
+ # [quote="[ladico]"] and [quote=\[ladico\]] to be correct tags
+ TAG_BEGIN_SCAN = /
+ \[ # tag start
+ ( \/ )? # $1 = closing tag?
+ ( [^\]] ) # $2 = tag id
+ /x
+ TAG_END_SCAN = /
+ [^\]]* # rest that was not handled
+ \]? # tag end
+ /x
+ CLOSE_TAG_SCAN = /
+ ( [^\]]* ) # $1 = the rest of the tag info
+ ( \/ )? # $2 = empty tag?
+ \]? # tag end
+ /x
+ UNCLOSED_TAG_SCAN = / \[ /x
+
+ CLASSIC_TAG_SCAN = / [a-z]* /ix
+
+ SEPARATOR_TAG_SCAN = / \** /x
+
+ FORMAT_TAG_SCAN = / -- -* /x
+
+ QUOTED_SCAN = /
+ ( # $1 = quoted text
+ [^"\\]* # normal*
+ (?: # (
+ \\. # special
+ [^"\\]* # normal*
+ )* # )*
+ )
+ "? # end quote "
+ /mx
+
+ ENTITY_SCAN = /
+ ( [^;\]]+ ) # $1 = entity code
+ ;? # optional ending semicolon
+ /ix
+
+ SMILEY_SCAN = Smileys::SMILEY_PATTERN
+
+ # this is the main parser loop that separates
+ # text - everything until "["
+ # from
+ # tags - starting with "[", ending with "]"
+ def parse_token
+ if @scanner.scan(SIMPLE_TEXT_SCAN)
+ add_text
+ else
+ handle_tag
+ end
+ end
+
+ def handle_tag
+ tag_start = @scanner.pos
+
+ unless @scanner.scan TAG_BEGIN_SCAN
+ garbage_out tag_start
+ return
+ end
+
+ closing, id = @scanner[1], @scanner[2]
+ #debug 'handle_tag(%p)' % @scanner.matched
+
+ handled =
+ case id
+
+ when /[a-z]/i
+ if @scanner.scan(CLASSIC_TAG_SCAN)
+ if handle_classic_tag(id + @scanner.matched, closing)
+ already_closed = true
+ end
+ end
+
+ when '*'
+ if @scanner.scan(SEPARATOR_TAG_SCAN)
+ handle_asterisk tag_start, id + @scanner.matched
+ true
+ end
+
+ when '-'
+ if @scanner.scan(FORMAT_TAG_SCAN)
+ #format = id + @scanner.matched
+ @encoder.add_html "\n<hr>\n"
+ true
+ end
+
+ when '"'
+ if @scanner.scan(QUOTED_SCAN)
+ @encoder.add_text unescape(@scanner[1])
+ true
+ end
+
+ when '&'
+ if @scanner.scan(ENTITY_SCAN)
+ @encoder.add_entity @scanner[1]
+ true
+ end
+
+ when Smileys::SMILEY_START_CHARSET
+ @scanner.pos = @scanner.pos - 1 # (ungetch)
+ if @scanner.scan(SMILEY_SCAN)
+ @encoder.add_html Smileys.smiley_to_image(@scanner.matched)
+ true
+ end
+
+ end # case
+
+ return garbage_out(tag_start) unless handled
+
+ @scanner.scan(TAG_END_SCAN) unless already_closed
+ end
+
+ ATTRIBUTES_SCAN = /
+ (
+ [^\]"\\]*
+ (?:
+ (?:
+ \\.
+ |
+ "
+ [^"\\]*
+ (?:
+ \\.
+ [^"\\]*
+ )*
+ "?
+ )
+ [^\]"\\]*
+ )*
+ )
+ \]?
+ /x
+
+ def handle_classic_tag name, closing
+ debug 'TAG: ' + (closing ? '/' : '') + name
+ # flatten
+ name.downcase!
+ tag_class = TAG_LIST[name]
+ return unless tag_class
+
+ #debug((opening ? 'OPEN ' : 'CLOSE ') + tag_class.name)
+
+ # create an attribute object to handle it
+ @scanner.scan(ATTRIBUTES_SCAN)
+ #debug name + ':' + @scanner[1]
+ attr = Attribute.create @scanner[1]
+ #debug 'ATTRIBUTES %p ' % attr #unless attr.empty?
+
+ #debug 'closing: %p; name=%s, attr=%p' % [closing, name, attr]
+
+ # OPEN
+ if not closing and tag = @tagstack.try_open_class(tag_class, attr)
+ #debug 'opening'
+ tag.do_open @scanner
+ # this should be done by the tag itself.
+ if attr.empty_tag?
+ tag.handle_empty
+ @tagstack.close_tag
+ elsif tag.special_content?
+ handle_special_content(tag)
+ @tagstack.close_tag
+ # # ignore asterisks directly after the opening; these are phpBBCode
+ # elsif tag.respond_to? :asterisk
+ # debug 'SKIP ASTERISKS: ' if @scanner.skip(ASTERISK_TAGS_SCAN)
+ end
+
+ # CLOSE
+ elsif @tagstack.try_close_class(tag_class)
+ #debug 'closing'
+ # GARBAGE
+ else
+ return
+ end
+
+ true
+ end
+
+ def handle_asterisk tag_start, stars
+ #debug 'ASTERISK: ' + stars.to_s
+ # rule for asterisk tags: they belong to the last tag
+ # that handles them. tags opened after this tag are closed.
+ # if no open tag uses them, all are closed.
+ tag = @tagstack.close_all_until { |tag| tag.respond_to? :asterisk }
+ unless tag and tag.asterisk stars, @scanner
+ garbage_out tag_start
+ end
+ end
+
+ def handle_special_content tag
+ scanned = @scanner.scan_until(tag.closing_tag)
+ if scanned
+ scanned.slice!(-(@scanner.matched.size)..-1)
+ else
+ scanned = @scanner.scan(/.*/m).to_s
+ end
+ #debug 'SPECIAL CONTENT: ' + scanned
+ tag.handle_content(scanned)
+ end
+
+ def unescape text
+ # input: correctly formatted quoted string (without the quotes)
+ text.gsub(/\\(?:(["\\])|.)/) { $1 or $& }
+ end
+
+
+ # MAGIC FEAUTURES
+
+ URL_PATTERN = /(?:(?:www|ftp)\.|(?>\w{3,}):\/\/)\S+/
+ EMAIL_PATTERN = /(?>[\w\-_.]+)@[\w\-\.]+\.\w+/
+
+ HAS_MAGIC = /[&@#{Smileys::SMILEY_START_CHARS}]|(?i:www|ftp)/
+
+ MAGIC_PATTERN = Regexp.new('(\W|^)(%s)' %
+ [Smileys::MAGIC_SMILEY_PATTERN, URL_PATTERN, EMAIL_PATTERN].map { |pattern|
+ pattern.to_s
+ }.join('|') )
+
+ IS_SMILEY_PATTERN = Regexp.new('^%s' % Smileys::SMILEY_START_CHARSET.to_s )
+ IS_URL_PATTERN = /^(?:(?i:www|ftp)\.|(?>\w+):\/\/)/
+ URL_STARTS_WITH_PROTOCOL = /^\w+:\/\//
+ IS_EMAIL_PATTERN = /^[\w\-_.]+@/
+
+ def to_magic text
+ # debug MAGIC_PATTERN.to_s
+ text.gsub!(MAGIC_PATTERN) {
+ magic = $2
+ $1 + case magic
+ when IS_SMILEY_PATTERN
+ Smileys.smiley_to_img magic
+ when IS_URL_PATTERN
+ last = magic.slice_punctation! # no punctation in my URL
+ href = magic
+ href.insert(0, 'http://') unless magic =~ URL_STARTS_WITH_PROTOCOL
+ '<a href="' + href + '">' + magic + '</a>' + last
+ when IS_EMAIL_PATTERN
+ last = magic.slice_punctation!
+ '<a href="mailto:' + magic + '">' + magic + '</a>' + last
+ else
+ raise '{{{' + magic + '}}}'
+ end
+ }
+ text
+ end
+
+ # handles smileys and urls
+ def parse_magic html
+ return html unless @do_magic
+ scanner = StringScanner.new html
+ out = ''
+ while scanner.rest?
+ if scanner.scan(/ < (?: a\s .*? <\/a> | pre\W .*? <\/pre> | [^>]* > ) /mx)
+ out << scanner.matched
+ elsif scanner.scan(/ [^<]+ /x)
+ out << to_magic(scanner.matched)
+
+ # this should never happen
+ elsif scanner.scan(/./m)
+ raise 'ERROR: else case reached'
+ end
+ end
+ out
+ end
+ end # Parser
+end
+
+class String
+ def slice_punctation!
+ slice!(/[.:,!\?]+$/).to_s # return '' instead of nil
+ end
+end
+
+#
+# = Grammar
+#
+# An implementation of common algorithms on grammars.
+#
+# This is used by Shinobu, a visualization tool for educating compiler-building.
+#
+# Thanks to Andreas Kunert for his wonderful LR(k) Pamphlet (German, see http://www.informatik.hu-berlin.de/~kunert/papers/lr-analyse), and Aho/Sethi/Ullman for their Dragon Book.
+#
+# Homepage:: http://shinobu.cYcnus.de (not existing yet)
+# Author:: murphy (Kornelius Kalnbach)
+# Copyright:: (cc) 2005 cYcnus
+# License:: GPL
+# Version:: 0.2.0 (2005-03-27)
+
+require 'set_hash'
+require 'ctype'
+require 'tools'
+require 'rules'
+require 'trace'
+
+require 'first'
+require 'follow'
+
+# = Grammar
+#
+# == Syntax
+#
+# === Rules
+#
+# Each line is a rule.
+# The syntax is
+#
+# left - right
+#
+# where +left+ and +right+ can be uppercase and lowercase letters,
+# and <code>-</code> can be any combination of <, >, - or whitespace.
+#
+# === Symbols
+#
+# Uppercase letters stand for meta symbols, lowercase for terminals.
+#
+# You can make epsilon-derivations by leaving <code><right></code> empty.
+#
+# === Example
+# S - Ac
+# A - Sc
+# A - b
+# A -
+class Grammar
+
+ attr_reader :tracer
+ # Creates a new Grammar.
+ # If $trace is true, the algorithms explain (textual) what they do to $stdout.
+ def initialize data, tracer = Tracer.new
+ @tracer = tracer
+ @rules = Rules.new
+ @terminals, @meta_symbols = SortedSet.new, Array.new
+ @start_symbol = nil
+ add_rules data
+ end
+
+ attr_reader :meta_symbols, :terminals, :rules, :start_symbol
+
+ alias_method :sigma, :terminals
+ alias_method :alphabet, :terminals
+ alias_method :variables, :meta_symbols
+ alias_method :nonterminals, :meta_symbols
+
+ # A string representation of the grammar for debugging.
+ def inspect productions_too = false
+ 'Grammar(meta symbols: %s; alphabet: %s; productions: [%s]; start symbol: %s)' %
+ [
+ meta_symbols.join(', '),
+ terminals.join(', '),
+ if productions_too
+ @rules.inspect
+ else
+ @rules.size
+ end,
+ start_symbol
+ ]
+ end
+
+ # Add rules to the grammar. +rules+ should be a String or respond to +scan+ in a similar way.
+ #
+ # Syntax: see Grammar.
+ def add_rules grammar
+ @rules = Rules.parse grammar do |rule|
+ @start_symbol ||= rule.left
+ @meta_symbols << rule.left
+ @terminals.merge rule.right.split('').select { |s| terminal? s }
+ end
+ @meta_symbols.uniq!
+ update
+ end
+
+ # Returns a hash acting as FIRST operator, so that
+ # <code>first["ABC"]</code> is FIRST(ABC).
+ # See http://en.wikipedia.org/wiki/LL_parser "Constructing an LL(1) parsing table" for details.
+ def first
+ first_operator
+ end
+
+ # Returns a hash acting as FOLLOW operator, so that
+ # <code>first["A"]</code> is FOLLOW(A).
+ # See http://en.wikipedia.org/wiki/LL_parser "Constructing an LL(1) parsing table" for details.
+ def follow
+ follow_operator
+ end
+
+ LLError = Class.new(Exception)
+ LLErrorType1 = Class.new(LLError)
+ LLErrorType2 = Class.new(LLError)
+
+ # Tests if the grammar is LL(1).
+ def ll1?
+ begin
+ for meta in @meta_symbols
+ first_sets = @rules[meta].map { |alpha| first[alpha] }
+ first_sets.inject(Set[]) do |already_used, another_first_set|
+ unless already_used.disjoint? another_first_set
+ raise LLErrorType1
+ end
+ already_used.merge another_first_set
+ end
+
+ if first[meta].include? EPSILON and not first[meta].disjoint? follow[meta]
+ raise LLErrorType2
+ end
+ end
+ rescue LLError
+ false
+ else
+ true
+ end
+ end
+
+private
+
+ def first_operator
+ @first ||= FirstOperator.new self
+ end
+
+ def follow_operator
+ @follow ||= FollowOperator.new self
+ end
+
+ def update
+ @first = @follow = nil
+ end
+
+end
+
+if $0 == __FILE__
+ eval DATA.read, nil, $0, __LINE__+4
+end
+
+require 'test/unit'
+
+class TestCaseGrammar < Test::Unit::TestCase
+
+ include Grammar::Symbols
+
+ def fifo s
+ Set[*s.split('')]
+ end
+
+ def test_fifo
+ assert_equal Set[], fifo('')
+ assert_equal Set[EPSILON, END_OF_INPUT, 'x', 'Y'], fifo('?xY$')
+ end
+
+ TEST_GRAMMAR_1 = <<-EOG
+S - ABCD
+A - a
+A -
+B - b
+B -
+C - c
+C -
+D - S
+D -
+ EOG
+
+ def test_symbols
+ assert EPSILON
+ assert END_OF_INPUT
+ end
+
+ def test_first_1
+ g = Grammar.new TEST_GRAMMAR_1
+
+ f = nil
+ assert_nothing_raised { f = g.first }
+ assert_equal(Set['a', EPSILON], f['A'])
+ assert_equal(Set['b', EPSILON], f['B'])
+ assert_equal(Set['c', EPSILON], f['C'])
+ assert_equal(Set['a', 'b', 'c', EPSILON], f['D'])
+ assert_equal(f['D'], f['S'])
+ end
+
+ def test_follow_1
+ g = Grammar.new TEST_GRAMMAR_1
+
+ f = nil
+ assert_nothing_raised { f = g.follow }
+ assert_equal(Set['a', 'b', 'c', END_OF_INPUT], f['A'])
+ assert_equal(Set['a', 'b', 'c', END_OF_INPUT], f['B'])
+ assert_equal(Set['a', 'b', 'c', END_OF_INPUT], f['C'])
+ assert_equal(Set[END_OF_INPUT], f['D'])
+ assert_equal(Set[END_OF_INPUT], f['S'])
+ end
+
+
+ TEST_GRAMMAR_2 = <<-EOG
+S - Ed
+E - EpT
+E - EmT
+E - T
+T - TuF
+T - TdF
+T - F
+F - i
+F - n
+F - aEz
+ EOG
+
+ def test_first_2
+ g = Grammar.new TEST_GRAMMAR_2
+
+ f = nil
+ assert_nothing_raised { f = g.first }
+ assert_equal(Set['a', 'n', 'i'], f['E'])
+ assert_equal(Set['a', 'n', 'i'], f['F'])
+ assert_equal(Set['a', 'n', 'i'], f['T'])
+ assert_equal(Set['a', 'n', 'i'], f['S'])
+ end
+
+ def test_follow_2
+ g = Grammar.new TEST_GRAMMAR_2
+
+ f = nil
+ assert_nothing_raised { f = g.follow }
+ assert_equal(Set['m', 'd', 'z', 'p'], f['E'])
+ assert_equal(Set['m', 'd', 'z', 'p', 'u'], f['F'])
+ assert_equal(Set['m', 'd', 'z', 'p', 'u'], f['T'])
+ assert_equal(Set[END_OF_INPUT], f['S'])
+ end
+
+ LLError = Grammar::LLError
+
+ TEST_GRAMMAR_3 = <<-EOG
+E - TD
+D - pTD
+D -
+T - FS
+S - uFS
+S -
+S - p
+F - aEz
+F - i
+ EOG
+
+ NoError = Class.new(Exception)
+
+ def test_first_3
+ g = Grammar.new TEST_GRAMMAR_3
+
+ # Grammar 3 is LL(1), so all first-sets must be disjoint.
+ f = nil
+ assert_nothing_raised { f = g.first }
+ assert_equal(Set['a', 'i'], f['E'])
+ assert_equal(Set[EPSILON, 'p'], f['D'])
+ assert_equal(Set['a', 'i'], f['F'])
+ assert_equal(Set['a', 'i'], f['T'])
+ assert_equal(Set[EPSILON, 'u', 'p'], f['S'])
+ for m in g.meta_symbols
+ r = g.rules[m]
+ firsts = r.map { |x| f[x] }.to_set
+ assert_nothing_raised do
+ firsts.inject(Set.new) do |already_used, another_first_set|
+ raise LLError, 'not disjoint!' unless already_used.disjoint? another_first_set
+ already_used.merge another_first_set
+ end
+ end
+ end
+ end
+
+ def test_follow_3
+ g = Grammar.new TEST_GRAMMAR_3
+
+ # Grammar 3 is not LL(1), because epsilon is in FIRST(S),
+ # but FIRST(S) and FOLLOW(S) are not disjoint.
+ f = nil
+ assert_nothing_raised { f = g.follow }
+ assert_equal(Set['z', END_OF_INPUT], f['E'])
+ assert_equal(Set['z', END_OF_INPUT], f['D'])
+ assert_equal(Set['z', 'p', 'u', END_OF_INPUT], f['F'])
+ assert_equal(Set['p', 'z', END_OF_INPUT], f['T'])
+ assert_equal(Set['p', 'z', END_OF_INPUT], f['S'])
+ for m in g.meta_symbols
+ first_m = g.first[m]
+ next unless first_m.include? EPSILON
+ assert_raise(m == 'S' ? LLError : NoError) do
+ if first_m.disjoint? f[m]
+ raise NoError # this is fun :D
+ else
+ raise LLError
+ end
+ end
+ end
+ end
+
+ TEST_GRAMMAR_3b = <<-EOG
+E - TD
+D - pTD
+D - PTD
+D -
+T - FS
+S - uFS
+S -
+F - aEz
+F - i
+P - p
+ EOG
+
+ def test_first_3b
+ g = Grammar.new TEST_GRAMMAR_3b
+
+ # Grammar 3b is NOT LL(1), since not all first-sets are disjoint.
+ f = nil
+ assert_nothing_raised { f = g.first }
+ assert_equal(Set['a', 'i'], f['E'])
+ assert_equal(Set[EPSILON, 'p'], f['D'])
+ assert_equal(Set['p'], f['P'])
+ assert_equal(Set['a', 'i'], f['F'])
+ assert_equal(Set['a', 'i'], f['T'])
+ assert_equal(Set[EPSILON, 'u'], f['S'])
+ for m in g.meta_symbols
+ r = g.rules[m]
+ firsts = r.map { |x| f[x] }
+ assert_raise(m == 'D' ? LLError : NoError) do
+ firsts.inject(Set.new) do |already_used, another_first_set|
+ raise LLError, 'not disjoint!' unless already_used.disjoint? another_first_set
+ already_used.merge another_first_set
+ end
+ raise NoError
+ end
+ end
+ end
+
+ def test_follow_3b
+ g = Grammar.new TEST_GRAMMAR_3b
+
+ # Although Grammar 3b is NOT LL(1), the FOLLOW-condition is satisfied.
+ f = nil
+ assert_nothing_raised { f = g.follow }
+ assert_equal(fifo('z$'), f['E'], 'E')
+ assert_equal(fifo('z$'), f['D'], 'D')
+ assert_equal(fifo('ai'), f['P'], 'P')
+ assert_equal(fifo('z$pu'), f['F'], 'F')
+ assert_equal(fifo('z$p'), f['T'], 'T')
+ assert_equal(fifo('z$p'), f['S'], 'S')
+ for m in g.meta_symbols
+ first_m = g.first[m]
+ next unless first_m.include? EPSILON
+ assert_raise(NoError) do
+ if first_m.disjoint? f[m]
+ raise NoError # this is fun :D
+ else
+ raise LLError
+ end
+ end
+ end
+ end
+
+ def test_ll1?
+ assert_equal false, Grammar.new(TEST_GRAMMAR_3).ll1?, 'Grammar 3'
+ assert_equal false, Grammar.new(TEST_GRAMMAR_3b).ll1?, 'Grammar 3b'
+ end
+
+ def test_new
+ assert_nothing_raised { Grammar.new '' }
+ assert_nothing_raised { Grammar.new TEST_GRAMMAR_1 }
+ assert_nothing_raised { Grammar.new TEST_GRAMMAR_2 }
+ assert_nothing_raised { Grammar.new TEST_GRAMMAR_3 }
+ assert_nothing_raised { Grammar.new TEST_GRAMMAR_1 + TEST_GRAMMAR_2 + TEST_GRAMMAR_3 }
+ assert_raise(ArgumentError) { Grammar.new 'S - ?' }
+ end
+end
+
+# vim:foldmethod=syntax
+
+#!/usr/bin/env ruby
+
+require 'fox12'
+
+include Fox
+
+class Window < FXMainWindow
+ def initialize(app)
+ super(app, app.appName + ": First Set Calculation", nil, nil, DECOR_ALL, 0, 0, 800, 600, 0, 0)
+
+ # {{{ menubar
+ menubar = FXMenuBar.new(self, LAYOUT_SIDE_TOP|LAYOUT_FILL_X)
+
+ filemenu = FXMenuPane.new(self)
+
+ FXMenuCommand.new(filemenu, "&Start\tCtl-S\tStart the application.", nil, getApp()).connect(SEL_COMMAND, method(:start))
+ FXMenuCommand.new(filemenu, "&Quit\tAlt-F4\tQuit the application.", nil, getApp(), FXApp::ID_QUIT)
+ FXMenuTitle.new(menubar, "&File", nil, filemenu)
+ # }}} menubar
+
+ # {{{ statusbar
+ @statusbar = FXStatusBar.new(self, LAYOUT_SIDE_BOTTOM|LAYOUT_FILL_X|STATUSBAR_WITH_DRAGCORNER)
+ # }}} statusbar
+
+ # {{{ window content
+ horizontalsplitt = FXSplitter.new(self, SPLITTER_VERTICAL|LAYOUT_SIDE_TOP|LAYOUT_FILL)
+
+
+ @productions = FXList.new(horizontalsplitt, nil, 0, LAYOUT_SIDE_TOP|LAYOUT_FILL_X|LAYOUT_FIX_HEIGHT|LIST_SINGLESELECT)
+ @productions.height = 100
+
+ @result = FXTable.new(horizontalsplitt, nil, 0, LAYOUT_FILL)
+ @result.height = 200
+ @result.setTableSize(2, 2, false)
+ @result.rowHeaderWidth = 0
+
+ header = @result.columnHeader
+ header.setItemText 0, 'X'
+ header.setItemText 1, 'FIRST(X)'
+ for item in header
+ item.justification = FXHeaderItem::CENTER_X
+ end
+
+ @debug = FXText.new(horizontalsplitt, nil, 0, LAYOUT_SIDE_BOTTOM|LAYOUT_FILL_X|LAYOUT_FIX_HEIGHT)
+ @debug.height = 200
+
+ # }}} window content
+ end
+
+ def load_grammar grammar
+ @tracer = FirstTracer.new(self)
+ @grammar = Grammar.new grammar, @tracer
+ @rules_indexes = Hash.new
+ @grammar.rules.each_with_index do |rule, i|
+ @productions.appendItem rule.inspect
+ @rules_indexes[rule] = i
+ end
+ end
+
+ def create
+ super
+ show(PLACEMENT_SCREEN)
+ end
+
+ def rule rule
+ @productions.selectItem @rules_indexes[rule]
+ sleep 0.1
+ end
+
+ def iterate i
+ setTitle i.to_s
+ sleep 0.1
+ end
+
+ def missing what
+ @debug.appendText what + "\n"
+ sleep 0.1
+ end
+
+ def start sender, sel, pointer
+ Thread.new do
+ begin
+ @grammar.first
+ rescue => boom
+ @debug.appendText [boom.to_s, *boom.backtrace].join("\n")
+ end
+ end
+ end
+
+end
+
+$: << 'grammar'
+require 'grammar'
+
+require 'first_tracer'
+
+app = FXApp.new("Shinobu", "cYcnus")
+
+# fenster erzeugen
+window = Window.new app
+
+unless ARGV.empty?
+ grammar = File.read(ARGV.first)
+else
+ grammar = <<-EOG1
+Z --> S
+S --> Sb
+S --> bAa
+A --> aSc
+A --> a
+A --> aSb
+ EOG1
+end
+
+window.load_grammar grammar
+
+app.create
+app.run
+
+require 'erb'
+require 'ftools'
+require 'yaml'
+require 'redcloth'
+
+module WhyTheLuckyStiff
+ class Book
+ attr_accessor :author, :title, :terms, :image, :teaser,
+ :chapters, :expansion_paks, :encoding, :credits
+ def [] x
+ @lang.fetch(x) do
+ warn warning = "[not translated: '#{x}'!]"
+ warning
+ end
+ end
+ end
+
+ def Book::load( file_name )
+ YAML::load( File.open( file_name ) )
+ end
+
+ class Section
+ attr_accessor :index, :header, :content
+ def initialize( i, h, c )
+ @index, @header, @content = i, h, RedCloth::new( c.to_s )
+ end
+ end
+
+ class Sidebar
+ attr_accessor :title, :content
+ end
+
+ YAML::add_domain_type( 'whytheluckystiff.net,2003', 'sidebar' ) do |taguri, val|
+ YAML::object_maker( Sidebar, 'title' => val.keys.first, 'content' => RedCloth::new( val.values.first ) )
+ end
+ class Chapter
+ attr_accessor :index, :title, :sections
+ def initialize( i, t, sects )
+ @index = i
+ @title = t
+ i = 0
+ @sections = sects.collect do |s|
+ if s.respond_to?( :keys )
+ i += 1
+ Section.new( i, s.keys.first, s.values.first )
+ else
+ s
+ end
+ end
+ end
+ end
+
+ YAML::add_domain_type( 'whytheluckystiff.net,2003', 'book' ) do |taguri, val|
+ ['chapters', 'expansion_paks'].each do |chaptype|
+ i = 0
+ val[chaptype].collect! do |c|
+ i += 1
+ Chapter::new( i, c.keys.first, c.values.first )
+ end
+ end
+ val['teaser'].collect! do |t|
+ Section::new( 1, t.keys.first, t.values.first )
+ end
+ val['terms'] = RedCloth::new( val['terms'] )
+ YAML::object_maker( Book, val )
+ end
+
+ class Image
+ attr_accessor :file_name
+ end
+
+ YAML::add_domain_type( 'whytheluckystiff.net,2003', 'img' ) do |taguri, val|
+ YAML::object_maker( Image, 'file_name' => "i/" + val )
+ end
+end
+
+#
+# Convert the book to HTML
+#
+if __FILE__ == $0
+ unless ARGV[0]
+ puts "Usage: #{$0} [/path/to/save/html]"
+ exit
+ end
+
+ site_path = ARGV[0]
+ book = WhyTheLuckyStiff::Book::load( 'poignant.yml' )
+ chapter = nil
+
+ # Write index page
+ index_tpl = ERB::new( File.open( 'index.erb' ).read )
+ File.open( File.join( site_path, 'index.html' ), 'w' ) do |out|
+ out << index_tpl.result
+ end
+
+ book.chapters = book.chapters[0,3] if ARGV.include? '-fast'
+
+ # Write chapter pages
+ chapter_tpl = ERB::new( File.open( 'chapter.erb' ).read )
+ book.chapters.each do |chapter|
+ File.open( File.join( site_path, "chapter-#{ chapter.index }.html" ), 'w' ) do |out|
+ out << chapter_tpl.result
+ end
+ end
+ exit if ARGV.include? '-fast'
+
+ # Write expansion pak pages
+ expak_tpl = ERB::new( File.open( 'expansion-pak.erb' ).read )
+ book.expansion_paks.each do |pak|
+ File.open( File.join( site_path, "expansion-pak-#{ pak.index }.html" ), 'w' ) do |out|
+ out << expak_tpl.result( binding )
+ end
+ end
+
+ # Write printable version
+ print_tpl = ERB::new( File.open( 'print.erb' ).read )
+ File.open( File.join( site_path, "print.html" ), 'w' ) do |out|
+ out << print_tpl.result
+ end
+
+ # Copy css + images into site
+ copy_list = ["guide.css"] +
+ Dir["i/*"].find_all { |image| image =~ /\.(gif|jpg|png)$/ }
+
+ File.makedirs( File.join( site_path, "i" ) )
+ copy_list.each do |copy_file|
+ File.copy( copy_file, File.join( site_path, copy_file ) )
+ end
+end
+
+#!/usr/bin/env ruby
+
+require 'fox'
+begin
+ require 'opengl'
+rescue LoadError
+ require 'fox/missingdep'
+ MSG = <<EOM
+ Sorry, this example depends on the OpenGL extension. Please
+ check the Ruby Application Archives for an appropriate
+ download site.
+EOM
+ missingDependency(MSG)
+end
+
+
+include Fox
+include Math
+
+Deg2Rad = Math::PI / 180
+
+D_MAX = 6
+SQUARE_SIZE = 2.0 / D_MAX
+SQUARE_DISTANCE = 4.0 / D_MAX
+AMPLITUDE = SQUARE_SIZE
+LAMBDA = D_MAX.to_f / 2
+
+class GLTestWindow < FXMainWindow
+
+ # How often our timer will fire (in milliseconds)
+ TIMER_INTERVAL = 500
+
+ # Rotate the boxes when a timer message is received
+ def onTimeout(sender, sel, ptr)
+ @angle += 10.0
+# @size = 0.5 + 0.2 * Math.cos(Deg2Rad * @angle)
+ drawScene()
+ @timer = getApp().addTimeout(TIMER_INTERVAL, method(:onTimeout))
+ end
+
+ # Rotate the boxes when a chore message is received
+ def onChore(sender, sel, ptr)
+ @angle += 10.0
+# @angle %= 360.0
+# @size = 0.5 + 0.2 * Math.cos(Deg2Rad * @angle)
+ drawScene()
+ @chore = getApp().addChore(method(:onChore))
+ end
+
+ # Draw the GL scene
+ def drawScene
+ lightPosition = [15.0, 10.0, 5.0, 1.0]
+ lightAmbient = [ 0.1, 0.1, 0.1, 1.0]
+ lightDiffuse = [ 0.9, 0.9, 0.9, 1.0]
+ redMaterial = [ 0.0, 0.0, 1.0, 1.0]
+ blueMaterial = [ 0.0, 1.0, 0.0, 1.0]
+
+ width = @glcanvas.width.to_f
+ height = @glcanvas.height.to_f
+ aspect = width/height
+
+ # Make context current
+ @glcanvas.makeCurrent()
+
+ GL.Viewport(0, 0, @glcanvas.width, @glcanvas.height)
+
+ GL.ClearColor(1.0/256, 0.0, 5.0/256, 1.0)
+ GL.Clear(GL::COLOR_BUFFER_BIT|GL::DEPTH_BUFFER_BIT)
+ GL.Enable(GL::DEPTH_TEST)
+
+ GL.Disable(GL::DITHER)
+
+ GL.MatrixMode(GL::PROJECTION)
+ GL.LoadIdentity()
+ GLU.Perspective(30.0, aspect, 1.0, 100.0)
+
+ GL.MatrixMode(GL::MODELVIEW)
+ GL.LoadIdentity()
+ GLU.LookAt(5.0, 10.0, 15.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0)
+
+ GL.ShadeModel(GL::SMOOTH)
+ GL.Light(GL::LIGHT0, GL::POSITION, lightPosition)
+ GL.Light(GL::LIGHT0, GL::AMBIENT, lightAmbient)
+ GL.Light(GL::LIGHT0, GL::DIFFUSE, lightDiffuse)
+ GL.Enable(GL::LIGHT0)
+ GL.Enable(GL::LIGHTING)
+
+ GL.Rotated(0.1*@angle, 0.0, 1.0, 0.0)
+ for x in -D_MAX..D_MAX
+ for y in -D_MAX..D_MAX
+ h1 = (x + y - 2).abs
+ h2 = (y - x + 1).abs
+ GL.PushMatrix
+ c = [1, 0, 0, 1]
+ GL.Material(GL::FRONT, GL::AMBIENT, c)
+ GL.Material(GL::FRONT, GL::DIFFUSE, c)
+
+ GL.Translated(
+ y * SQUARE_DISTANCE,
+ AMPLITUDE * h1,
+ x * SQUARE_DISTANCE
+ )
+
+ GL.Begin(GL::TRIANGLE_STRIP)
+ GL.Normal(1.0, 0.0, 0.0)
+ GL.Vertex(-SQUARE_SIZE, +SQUARE_SIZE, -SQUARE_SIZE)
+ GL.Vertex(-SQUARE_SIZE, +SQUARE_SIZE, +SQUARE_SIZE)
+ GL.Vertex(+SQUARE_SIZE, +SQUARE_SIZE, -SQUARE_SIZE)
+ GL.Vertex(+SQUARE_SIZE, +SQUARE_SIZE, +SQUARE_SIZE)
+ GL.End
+
+ GL.PopMatrix
+
+ GL.PushMatrix
+ c = [0, 0, 1, 1]
+ GL.Material(GL::FRONT, GL::AMBIENT, c)
+ GL.Material(GL::FRONT, GL::DIFFUSE, c)
+
+ GL.Translated(
+ y * SQUARE_DISTANCE,
+ AMPLITUDE * h2,
+ x * SQUARE_DISTANCE
+ )
+
+ GL.Begin(GL::TRIANGLE_STRIP)
+ GL.Normal(1.0, 0.0, 0.0)
+ GL.Vertex(-SQUARE_SIZE, +SQUARE_SIZE, -SQUARE_SIZE)
+ GL.Vertex(-SQUARE_SIZE, +SQUARE_SIZE, +SQUARE_SIZE)
+ GL.Vertex(+SQUARE_SIZE, +SQUARE_SIZE, -SQUARE_SIZE)
+ GL.Vertex(+SQUARE_SIZE, +SQUARE_SIZE, +SQUARE_SIZE)
+ GL.End
+
+ GL.PopMatrix
+
+ GL.PushMatrix
+ c = [0.0 + (x/10.0), 0.0 + (y/10.0), 0, 1]
+ GL.Material(GL::FRONT, GL::AMBIENT, c)
+ GL.Material(GL::FRONT, GL::DIFFUSE, c)
+
+ GL.Translated(
+ y * SQUARE_DISTANCE,
+ 0,
+ x * SQUARE_DISTANCE
+ )
+
+ GL.Begin(GL::TRIANGLE_STRIP)
+ GL.Normal(1.0, 0.0, 0.0)
+ GL.Vertex(-SQUARE_SIZE, +SQUARE_SIZE, -SQUARE_SIZE)
+ GL.Vertex(-SQUARE_SIZE, +SQUARE_SIZE, +SQUARE_SIZE)
+ GL.Vertex(+SQUARE_SIZE, +SQUARE_SIZE, -SQUARE_SIZE)
+ GL.Vertex(+SQUARE_SIZE, +SQUARE_SIZE, +SQUARE_SIZE)
+ GL.End
+
+ GL.PopMatrix
+ end
+ end
+
+ # Swap if it is double-buffered
+ if @glvisual.isDoubleBuffer
+ @glcanvas.swapBuffers
+ end
+
+ # Make context non-current
+ @glcanvas.makeNonCurrent
+ end
+
+ def initialize(app)
+ # Invoke the base class initializer
+ super(app, "OpenGL Test Application", nil, nil, DECOR_ALL, 0, 0, 1024, 768)
+
+ # Construct the main window elements
+ frame = FXHorizontalFrame.new(self, LAYOUT_SIDE_TOP|LAYOUT_FILL_X|LAYOUT_FILL_Y)
+ frame.padLeft, frame.padRight = 0, 0
+ frame.padTop, frame.padBottom = 0, 0
+
+ # Left pane to contain the glcanvas
+ glcanvasFrame = FXVerticalFrame.new(frame,
+ LAYOUT_FILL_X|LAYOUT_FILL_Y|LAYOUT_TOP|LAYOUT_LEFT)
+ glcanvasFrame.padLeft, glcanvasFrame.padRight = 10, 10
+ glcanvasFrame.padTop, glcanvasFrame.padBottom = 10, 10
+
+ # Label above the glcanvas
+ FXLabel.new(glcanvasFrame, "OpenGL Canvas Frame", nil,
+ JUSTIFY_CENTER_X|LAYOUT_FILL_X)
+
+ # Horizontal divider line
+ FXHorizontalSeparator.new(glcanvasFrame, SEPARATOR_GROOVE|LAYOUT_FILL_X)
+
+ # Drawing glcanvas
+ glpanel = FXVerticalFrame.new(glcanvasFrame, (FRAME_SUNKEN|FRAME_THICK|
+ LAYOUT_FILL_X|LAYOUT_FILL_Y|LAYOUT_TOP|LAYOUT_LEFT))
+ glpanel.padLeft, glpanel.padRight = 0, 0
+ glpanel.padTop, glpanel.padBottom = 0, 0
+
+ # A visual to draw OpenGL
+ @glvisual = FXGLVisual.new(getApp(), VISUAL_DOUBLEBUFFER)
+
+ # Drawing glcanvas
+ @glcanvas = FXGLCanvas.new(glpanel, @glvisual, nil, 0,
+ LAYOUT_FILL_X|LAYOUT_FILL_Y|LAYOUT_TOP|LAYOUT_LEFT)
+ @glcanvas.connect(SEL_PAINT) {
+ drawScene
+ }
+ @glcanvas.connect(SEL_CONFIGURE) {
+ if @glcanvas.makeCurrent
+ GL.Viewport(0, 0, @glcanvas.width, @glcanvas.height)
+ @glcanvas.makeNonCurrent
+ end
+ }
+
+ # Right pane for the buttons
+ buttonFrame = FXVerticalFrame.new(frame, LAYOUT_FILL_Y|LAYOUT_TOP|LAYOUT_LEFT)
+ buttonFrame.padLeft, buttonFrame.padRight = 10, 10
+ buttonFrame.padTop, buttonFrame.padBottom = 10, 10
+
+ # Label above the buttons
+ FXLabel.new(buttonFrame, "Button Frame", nil,
+ JUSTIFY_CENTER_X|LAYOUT_FILL_X)
+
+ # Horizontal divider line
+ FXHorizontalSeparator.new(buttonFrame, SEPARATOR_RIDGE|LAYOUT_FILL_X)
+
+ # Spin according to timer
+ spinTimerBtn = FXButton.new(buttonFrame,
+ "Spin &Timer\tSpin using interval timers\nNote the app
+ blocks until the interal has elapsed...", nil,
+ nil, 0, FRAME_THICK|FRAME_RAISED|LAYOUT_FILL_X|LAYOUT_TOP|LAYOUT_LEFT)
+ spinTimerBtn.padLeft, spinTimerBtn.padRight = 10, 10
+ spinTimerBtn.padTop, spinTimerBtn.padBottom = 5, 5
+ spinTimerBtn.connect(SEL_COMMAND) {
+ @spinning = true
+ @timer = getApp().addTimeout(TIMER_INTERVAL, method(:onTimeout))
+ }
+ spinTimerBtn.connect(SEL_UPDATE) { |sender, sel, ptr|
+ @spinning ? sender.disable : sender.enable
+ }
+
+ # Spin according to chore
+ spinChoreBtn = FXButton.new(buttonFrame,
+ "Spin &Chore\tSpin as fast as possible using chores\nNote even though the
+ app is very responsive, it never blocks;\nthere is always something to
+ do...", nil,
+ nil, 0, FRAME_THICK|FRAME_RAISED|LAYOUT_FILL_X|LAYOUT_TOP|LAYOUT_LEFT)
+ spinChoreBtn.padLeft, spinChoreBtn.padRight = 10, 10
+ spinChoreBtn.padTop, spinChoreBtn.padBottom = 5, 5
+ spinChoreBtn.connect(SEL_COMMAND) {
+ @spinning = true
+ @chore = getApp().addChore(method(:onChore))
+ }
+ spinChoreBtn.connect(SEL_UPDATE) { |sender, sel, ptr|
+ @spinning ? sender.disable : sender.enable
+ }
+
+ # Stop spinning
+ stopBtn = FXButton.new(buttonFrame,
+ "&Stop Spin\tStop this mad spinning, I'm getting dizzy", nil,
+ nil, 0, FRAME_THICK|FRAME_RAISED|LAYOUT_FILL_X|LAYOUT_TOP|LAYOUT_LEFT)
+ stopBtn.padLeft, stopBtn.padRight = 10, 10
+ stopBtn.padTop, stopBtn.padBottom = 5, 5
+ stopBtn.connect(SEL_COMMAND) {
+ @spinning = false
+ if @timer
+ getApp().removeTimeout(@timer)
+ @timer = nil
+ end
+ if @chore
+ getApp().removeChore(@chore)
+ @chore = nil
+ end
+ }
+ stopBtn.connect(SEL_UPDATE) { |sender, sel, ptr|
+ @spinning ? sender.enable : sender.disable
+ }
+
+ # Exit button
+ exitBtn = FXButton.new(buttonFrame, "&Exit\tExit the application", nil,
+ getApp(), FXApp::ID_QUIT,
+ FRAME_THICK|FRAME_RAISED|LAYOUT_FILL_X|LAYOUT_TOP|LAYOUT_LEFT)
+ exitBtn.padLeft, exitBtn.padRight = 10, 10
+ exitBtn.padTop, exitBtn.padBottom = 5, 5
+
+ # Make a tooltip
+ FXTooltip.new(getApp())
+
+ # Initialize private variables
+ @spinning = false
+ @chore = nil
+ @timer = nil
+ @angle = 0.0
+ @size = 0.5
+ end
+
+ # Create and initialize
+ def create
+ super
+ show(PLACEMENT_SCREEN)
+ end
+end
+
+if __FILE__ == $0
+ # Construct the application
+ application = FXApp.new("GLTest", "FoxTest")
+
+ # To ensure that the chores-based spin will run as fast as possible,
+ # we can disable the chore in FXRuby's event loop that tries to schedule
+ # other threads. This is OK for this program because there aren't any
+ # other Ruby threads running.
+
+ #application.disableThreads
+
+ # Construct the main window
+ GLTestWindow.new(application)
+
+ # Create the app's windows
+ application.create
+
+ # Run the application
+ application.run
+end
+
+class Facelet
+ attr_accessor :color
+ def initialize(color)
+ @color = color
+ end
+
+ def to_s
+ @color
+ end
+end
+
+class Edge
+ attr_accessor :facelets, :colors
+
+ def initialize(facelets)
+ @facelets = facelets
+ @colors = @facelets.map { |fl| fl.color }
+ end
+
+ def apply(edge)
+ @facelets.each_with_index { |fl, i|
+ fl.color = edge.colors[i]
+ }
+ end
+
+ def inspect
+ "\n%s %s\n%s %s %s" % facelets
+ end
+end
+
+class Side
+ attr_reader :num, :facelets
+ attr_accessor :sides
+
+ def initialize(num)
+ @num = num
+ @sides = []
+ @facelets = []
+ @fl_by_side = {}
+ end
+
+ # facelets & sides
+ # 0
+ # 0 1 2
+ # 3 3 4 5 1
+ # 6 7 8
+ # 2
+
+ def facelets=(facelets)
+ @facelets = facelets.map { |c| Facelet.new(c) }
+ init_facelet 0, 3,0
+ init_facelet 1, 0
+ init_facelet 2, 0,1
+ init_facelet 3, 3
+ init_facelet 5, 1
+ init_facelet 6, 2,3
+ init_facelet 7, 2
+ init_facelet 8, 1,2
+ end
+
+ def <=>(side)
+ self.num <=> side.num
+ end
+
+ def init_facelet(pos, *side_nums)
+ sides = side_nums.map { |num| @sides[num] }.sort
+ @fl_by_side[sides] = pos
+ end
+
+ def []=(color, *sides)
+ @facelets[@fl_by_side[sides.sort]].color = color
+ end
+
+ def values_at(*sides)
+ sides.map { |sides| @facelets[@fl_by_side[sides.sort]] }
+ end
+
+ def inspect(range=nil)
+ if range
+ @facelets.values_at(*(range.to_a)).join(' ')
+ else
+ <<-EOS.gsub(/\d/) { |num| @facelets[num.to_i] }.gsub(/[ABCD]/) { |side| @sides[side[0]-?A].num.to_s }
+ A
+ 0 1 2
+ D 3 4 5 B
+ 6 7 8
+ C
+ EOS
+ end
+ end
+
+ def get_edge(side)
+ trio = (-1..1).map { |x| (side + x) % 4 }
+ prev_side, this_side, next_side = @sides.values_at(*trio)
+ e = Edge.new(
+ self .values_at( [this_side], [this_side, next_side] ) +
+ this_side.values_at( [self, prev_side], [self ], [self, next_side] )
+ )
+ #puts 'Edge created for side %d: ' % side + e.inspect
+ e
+ end
+
+ def turn(dir)
+ #p 'turn side %d in %d' % [num, dir]
+ edges = (0..3).map { |n| get_edge n }
+ for i in 0..3
+ edges[i].apply edges[(i-dir) % 4]
+ end
+ end
+end
+
+class Cube
+ def initialize
+ @sides = []
+ %w(left front right back top bottom).each_with_index { |side, i|
+ eval("@sides[#{i}] = @#{side} = Side.new(#{i})")
+ }
+ @left.sides = [@top, @front, @bottom, @back]
+ @front.sides = [@top, @right, @bottom, @left]
+ @right.sides = [@top, @back, @bottom, @front]
+ @back.sides = [@top, @left, @bottom, @right]
+ @top.sides = [@back, @right, @front, @left]
+ @bottom.sides = [@front, @right, @back, @left]
+ end
+
+ def read_facelets(fs)
+ pattern = Regexp.new(<<-EOP.gsub(/\w/, '\w').gsub(/\s+/, '\s*'))
+ (w w w)
+ (w w w)
+ (w w w)
+(r r r) (g g g) (b b b) (o o o)
+(r r r) (g g g) (b b b) (o o o)
+(r r r) (g g g) (b b b) (o o o)
+ (y y y)
+ (y y y)
+ (y y y)
+ EOP
+ md = pattern.match(fs).to_a
+
+ @top.facelets = parse_facelets(md.values_at(1,2,3))
+ @left.facelets = parse_facelets(md.values_at(4,8,12))
+ @front.facelets = parse_facelets(md.values_at(5,9,13))
+ @right.facelets = parse_facelets(md.values_at(6,10,14))
+ @back.facelets = parse_facelets(md.values_at(7,11,15))
+ @bottom.facelets = parse_facelets(md.values_at(16,17,18))
+ end
+
+ def turn(side, dir)
+ #p 'turn %d in %d' % [side, dir]
+ @sides[side].turn(dir)
+ #puts inspect
+ end
+
+ def inspect
+ <<-EOF.gsub(/(\d):(\d)-(\d)/) { @sides[$1.to_i].inspect(Range.new($2.to_i, $3.to_i)) }
+ 4:0-2
+ 4:3-5
+ 4:6-8
+0:0-2 1:0-2 2:0-2 3:0-2
+0:3-5 1:3-5 2:3-5 3:3-5
+0:6-8 1:6-8 2:6-8 3:6-8
+ 5:0-2
+ 5:3-5
+ 5:6-8
+ EOF
+ end
+
+private
+ def parse_facelets(rows)
+ rows.join.delete(' ').split(//)
+ end
+end
+
+#$stdin = DATA
+
+gets.to_i.times do |i|
+ puts "Scenario ##{i+1}:"
+ fs = ''
+ 9.times { fs << gets }
+ cube = Cube.new
+ cube.read_facelets fs
+ gets.to_i.times do |t|
+ side, dir = gets.split.map {|s| s.to_i}
+ cube.turn(side, dir)
+ end
+ puts cube.inspect
+ puts
+end
+
+# 2004 by murphy <korny@cYcnus.de>
+# GPL
+class Scenario
+ class TimePoint
+ attr_reader :data
+ def initialize *data
+ @data = data
+ end
+
+ def [] i
+ @data[i] or 0
+ end
+
+ include Comparable
+ def <=> tp
+ r = 0
+ [@data.size, tp.data.size].max.times do |i|
+ r = self[i] <=> tp[i]
+ return r if r.nonzero?
+ end
+ 0
+ end
+
+ def - tp
+ r = []
+ [@data.size, tp.data.size].max.times do |i|
+ r << self[i] - tp[i]
+ end
+ r
+ end
+
+ def inspect
+ # 01/01/1800 00:00:00
+ '%02d/%02d/%04d %02d:%02d:%02d' % @data.values_at(1, 2, 0, 3, 4, 5)
+ end
+ end
+
+ ONE_HOUR = TimePoint.new 0, 0, 0, 1, 0, 0
+
+ APPOINTMENT_PATTERN = /
+ ( \d{4} ) \s ( \d{2} ) \s ( \d{2} ) \s ( \d{2} ) \s ( \d{2} ) \s ( \d{2} ) \s
+ ( \d{4} ) \s ( \d{2} ) \s ( \d{2} ) \s ( \d{2} ) \s ( \d{2} ) \s ( \d{2} )
+ /x
+
+ def initialize io
+ @team_size = io.gets.to_i
+ @data = [ [TimePoint.new(1800, 01, 01, 00, 00, 00), @team_size] ]
+ @team_size.times do # each team member
+ io.gets.to_i.times do # each appointment
+ m = APPOINTMENT_PATTERN.match io.gets
+ @data << [TimePoint.new(*m.captures[0,6].map { |x| x.to_i }), -1]
+ @data << [TimePoint.new(*m.captures[6,6].map { |x| x.to_i }), +1]
+ end
+ end
+ @data << [TimePoint.new(2200, 01, 01, 00, 00, 00), -@team_size]
+ end
+
+ def print_time_plan
+ n = 0
+ appointment = nil
+ no_appointment = true
+ @data.sort_by { |x| x[0] }.each do |x|
+ tp, action = *x
+ n += action
+ # at any time during the meeting, at least two team members need to be there
+ # and at most one team member is allowed to be absent
+ if n >= 2 and (@team_size - n) <= 1
+ appointment ||= tp
+ else
+ if appointment
+ # the meeting should be at least one hour in length
+ if TimePoint.new(*(tp - appointment)) >= ONE_HOUR
+ puts 'appointment possible from %p to %p' % [appointment, tp]
+ no_appointment = false
+ end
+ appointment = false
+ end
+ end
+ end
+ puts 'no appointment possible' if no_appointment
+ end
+end
+
+# read the data
+DATA.gets.to_i.times do |si| # each scenario
+ puts 'Scenario #%d:' % (si + 1)
+ sc = Scenario.new DATA
+ sc.print_time_plan
+ puts
+end
+
+#__END__
+2
+3
+3
+2002 06 28 15 00 00 2002 06 28 18 00 00 TUD Contest Practice Session
+2002 06 29 10 00 00 2002 06 29 15 00 00 TUD Contest
+2002 11 15 15 00 00 2002 11 17 23 00 00 NWERC Delft
+4
+2002 06 25 13 30 00 2002 06 25 15 30 00 FIFA World Cup Semifinal I
+2002 06 26 13 30 00 2002 06 26 15 30 00 FIFA World Cup Semifinal II
+2002 06 29 13 00 00 2002 06 29 15 00 00 FIFA World Cup Third Place
+2002 06 30 13 00 00 2002 06 30 15 00 00 FIFA World Cup Final
+1
+2002 06 01 00 00 00 2002 06 29 18 00 00 Preparation of Problem Set
+2
+1
+1800 01 01 00 00 00 2200 01 01 00 00 00 Solving Problem 8
+0
+
+require 'token_consts'
+require 'symbol'
+require 'ctype'
+require 'error'
+
+class Fixnum
+ # Treat char as a digit and return it's value as Fixnum.
+ # Returns nonsense for non-digits.
+ # Examples:
+ # <code>
+ # RUBY_VERSION[0].digit == '1.8.2'[0].digit == 1
+ # </code>
+ #
+ # <code>
+ # ?6.digit == 6
+ # </code>
+ #
+ # <code>
+ # ?A.digit == 17
+ # </code>
+ def digit
+ self - ?0
+ end
+end
+
+##
+# Stellt einen einfachen Scanner für die lexikalische Analyse der Sprache Pas-0 dar.
+#
+# @author Andreas Kunert
+# Ruby port by murphy
+class Scanner
+
+ include TokenConsts
+
+ attr_reader :line, :pos
+
+ # To allow Scanner.new without parameters.
+ DUMMY_INPUT = 'dummy file'
+ def DUMMY_INPUT.getc
+ nil
+ end
+
+ ##
+ # Erzeugt einen Scanner, der als Eingabe das übergebene IO benutzt.
+ def initialize input = DUMMY_INPUT
+ @line = 1
+ @pos = 0
+
+ begin
+ @input = input
+ @next_char = @input.getc
+ rescue IOError # TODO show the reason!
+ Error.ioError
+ raise
+ end
+ end
+
+ ##
+ # Liest das n + def read_next_char
+ begin
+ @pos += 1
+ @current_char = @next_char
+ @next_char = @input.getc
+ rescue IOError
+ Error.ioError
+ raise
+ end
+ end
+
+ ##
+ # Sucht das nächste Symbol, identifiziert es, instantiiert ein entsprechendes
+ # PascalSymbol-Objekt und gibt es zurück.
+ # @see Symbol
+ # @return das gefundene Symbol als PascalSymbol-Objekt
+ def get_symbol
+ current_symbol = nil
+ until current_symbol
+ read_next_char
+
+ if @current_char.alpha?
+ identifier = @current_char.chr
+ while @next_char.alpha? or @next_char.digit?
+ identifier << @next_char
+ read_next_char
+ end
+ current_symbol = handle_identifier(identifier.upcase)
+ elsif @current_char.digit?
+ current_symbol = number
+ else
+ case @current_char
+ when ?\s
+ # ignore
+ when ?\n
+ new_line
+ when nil
+ current_symbol = PascalSymbol.new EOP
+ when ?{
+ comment
+
+ when ?:
+ if @next_char == ?=
+ read_next_char
+ current_symbol = PascalSymbol.new BECOMES
+ else
+ current_symbol = PascalSymbol.new COLON
+ end
+
+ when ?<
+ if (@next_char == ?=)
+ read_next_char
+ current_symbol = PascalSymbol.new LEQSY
+ elsif (@next_char == ?>)
+ read_next_char
+ current_symbol = PascalSymbol.new NEQSY
+ else
+ current_symbol = PascalSymbol.new LSSSY
+ end
+
+ when ?>
+ if (@next_char == ?=)
+ read_next_char
+ current_symbol = PascalSymbol.new GEQSY
+ else
+ current_symbol = PascalSymbol.new GRTSY
+ end
+
+ when ?. then current_symbol = PascalSymbol.new PERIOD
+ when ?( then current_symbol = PascalSymbol.new LPARENT
+ when ?, then current_symbol = PascalSymbol.new COMMA
+ when ?* then current_symbol = PascalSymbol.new TIMES
+ when ?/ then current_symbol = PascalSymbol.new SLASH
+ when ?+ then current_symbol = PascalSymbol.new PLUS
+ when ?- then current_symbol = PascalSymbol.new MINUS
+ when ?= then current_symbol = PascalSymbol.new EQLSY
+ when ?) then current_symbol = PascalSymbol.new RPARENT
+ when ?; then current_symbol = PascalSymbol.new SEMICOLON
+ else
+ Error.error(100, @line, @pos) if @current_char > ?\s
+ end
+ end
+ end
+ current_symbol
+ end
+
+private
+ ##
+ # Versucht, in dem gegebenen String ein Schlüsselwort zu erkennen.
+ # Sollte dabei ein Keyword gefunden werden, so gibt er ein PascalSymbol-Objekt zurück, das
+ # das entsprechende Keyword repräsentiert. Ansonsten besteht die Rückgabe aus
+ # einem SymbolIdent-Objekt (abgeleitet von PascalSymbol), das den String 1:1 enthält
+ # @see symbol
+ # @return falls Keyword gefunden, zugehöriges PascalSymbol, sonst SymbolIdent
+ def handle_identifier identifier
+ if sym = KEYWORD_SYMBOLS[identifier]
+ PascalSymbol.new sym
+ else
+ SymbolIdent.new identifier
+ end
+ end
+
+ MAXINT = 2**31 - 1
+ MAXINT_DIV_10 = MAXINT / 10
+ MAXINT_MOD_10 = MAXINT % 10
+ ##
+ # Versucht, aus dem gegebenen Zeichen und den folgenden eine Zahl zusammenzusetzen.
+ # Dabei wird der relativ intuitive Algorithmus benutzt, die endgültige Zahl bei
+ # jeder weiteren Ziffer mit 10 zu multiplizieren und diese dann mit der Ziffer zu
+ # addieren. Sonderfälle bestehen dann nur noch in der Behandlung von reellen Zahlen.
+ # <BR>
+ # Treten dabei kein Punkt oder ein E auf, so gibt diese Methode ein SymbolIntCon-Objekt
+ # zurück, ansonsten (reelle Zahl) ein SymbolRealCon-Objekt. Beide Symbole enthalten
+ # jeweils die Zahlwerte.
+ # <BR>
+ # Anmerkung: Diese Funktion ist mit Hilfe der Java/Ruby-API deutlich leichter zu realisieren.
+ # Sie wurde dennoch so implementiert, um den Algorithmus zu demonstrieren
+ # @see symbol
+ # @return SymbolIntcon- oder SymbolRealcon-Objekt, das den Zahlwert enthält
+ def number
+ is_integer = true
+ integer_too_long = false
+ exponent = 0
+ exp_counter = -1
+ exp_sign = 1
+
+ integer_mantisse = @current_char.digit
+
+ while (@next_char.digit? and integer_mantisse < MAXINT_DIV_10) or
+ (integer_mantisse == MAXINT_DIV_10 and @next_char.digit <= MAXINT_MOD_10)
+ integer_mantisse *= 10
+ integer_mantisse += @next_char.digit
+ read_next_char
+ end
+
+ real_mantisse = integer_mantisse
+
+ while @next_char.digit?
+ integer_too_long = true
+ real_mantisse *= 10
+ real_mantisse += @next_char.digit
+ read_next_char
+ end
+ if @next_char == ?.
+ read_next_char
+ is_integer = false
+ unless @next_char.digit?
+ Error.error 101, @line, @pos
+ end
+ while @next_char.digit?
+ real_mantisse += @next_char.digit * (10 ** exp_counter)
+ read_next_char
+ exp_counter -= 1
+ end
+ end
+ if @next_char == ?E
+ is_integer = false
+ read_next_char
+ if @next_char == ?-
+ exp_sign = -1
+ read_next_char
+ end
+ unless @next_char.digit?
+ Error.error 101, @line, @pos
+ end
+ while @next_char.digit?
+ exponent *= 10
+ exponent += @next_char.digit
+ read_next_char
+ end
+ end
+
+ if is_integer
+ if integer_too_long
+ Error.error 102, @line, @pos
+ end
+ SymbolIntcon.new integer_mantisse
+ else
+ SymbolRealcon.new real_mantisse * (10 ** (exp_sign * exponent))
+ end
+ end
+
+ ##
+ # Sorgt für ein Überlesen von Kommentaren.
+ # Es werden einfach alle Zeichen bis zu einer schließenden Klammer eingelesen
+ # und verworfen.
+ def comment
+ while @current_char != ?}
+ forbid_eop
+ new_line if @current_char == ?\n
+ read_next_char
+ end
+ end
+
+ def new_line
+ @line += 1
+ @pos = 0
+ end
+
+ def forbid_eop
+ if eop?
+ Error.error 103, @line, @pos
+ end
+ exit
+ end
+
+ def eop?
+ @current_char.nil?
+ end
+end
+
+##
+# Läßt ein Testprogramm ablaufen.
+# Dieses erzeugt sich ein Scanner-Objekt und ruft an diesem kontinuierlich bis zum Dateiende
+# get_symbol auf.
+if $0 == __FILE__
+ scan = Scanner.new(File.new(ARGV[0] || 'test.pas'))
+ loop do
+ c = scan.get_symbol
+ puts c
+ break if c.typ == TokenConsts::EOP
+ end
+end
+# -*- ruby -*-
+
+# Local variables:
+# indent-tabs-mode: nil
+# ruby-indent-level: 4
+# End:
+
+# @@PLEAC@@_NAME
+# @@SKIP@@ Ruby
+
+# @@PLEAC@@_WEB
+# @@SKIP@@ http://www.ruby-lang.org
+
+
+# @@PLEAC@@_1.0
+string = '\n' # two characters, \ and an n
+string = 'Jon \'Maddog\' Orwant' # literal single quotes
+
+string = "\n" # a "newline" character
+string = "Jon \"Maddog\" Orwant" # literal double quotes
+
+string = %q/Jon 'Maddog' Orwant/ # literal single quotes
+
+string = %q[Jon 'Maddog' Orwant] # literal single quotes
+string = %q{Jon 'Maddog' Orwant} # literal single quotes
+string = %q(Jon 'Maddog' Orwant) # literal single quotes
+string = %q<Jon 'Maddog' Orwant> # literal single quotes
+
+a = <<"EOF"
+This is a multiline here document
+terminated by EOF on a line by itself
+EOF
+
+
+# @@PLEAC@@_1.1
+value = string[offset,count]
+value = string[offset..-1]
+
+string[offset,count] = newstring
+string[offset..-1] = newtail
+
+# in Ruby we can also specify intervals by their two offsets
+value = string[offset..offs2]
+string[offset..offs2] = newstring
+
+leading, s1, s2, trailing = data.unpack("A5 x3 A8 A8 A*")
+
+fivers = string.unpack("A5" * (string.length/5))
+
+chars = string.unpack("A1" * string.length)
+
+string = "This is what you have"
+# +012345678901234567890 Indexing forwards (left to right)
+# 109876543210987654321- Indexing backwards (right to left)
+# note that 0 means 10 or 20, etc. above
+
+first = string[0, 1] # "T"
+start = string[5, 2] # "is"
+rest = string[13..-1] # "you have"
+last = string[-1, 1] # "e"
+end_ = string[-4..-1] # "have"
+piece = string[-8, 3] # "you"
+
+string[5, 2] = "wasn't" # change "is" to "wasn't"
+string[-12..-1] = "ondrous" # "This wasn't wondrous"
+string[0, 1] = "" # delete first character
+string[-10..-1] = "" # delete last 10 characters
+
+if string[-10..-1] =~ /pattern/
+ puts "Pattern matches in last 10 characters"
+end
+
+string[0, 5].gsub!(/is/, 'at')
+
+a = "make a hat"
+a[0, 1], a[-1, 1] = a[-1, 1], a[0, 1]
+
+a = "To be or not to be"
+b = a.unpack("x6 A6")
+
+b, c = a.unpack("x6 A2 X5 A2")
+puts "#{b}\n#{c}\n"
+
+def cut2fmt(*args)
+ template = ''
+ lastpos = 1
+ for place in args
+ template += "A" + (place - lastpos).to_s + " "
+ lastpos = place
+ end
+ template += "A*"
+ return template
+end
+
+fmt = cut2fmt(8, 14, 20, 26, 30)
+
+
+# @@PLEAC@@_1.2
+# careful! "b is true" doesn't mean "b != 0" (0 is true in Ruby)
+# thus no problem of "defined" later since only nil is false
+# the following sets to `c' if `b' is nil or false
+a = b || c
+
+# if you need Perl's behaviour (setting to `c' if `b' is 0) the most
+# effective way is to use Numeric#nonzero? (thanks to Dave Thomas!)
+a = b.nonzero? || c
+
+# you will still want to use defined? in order to test
+# for scope existence of a given object
+a = defined?(b) ? b : c
+
+dir = ARGV.shift || "/tmp"
+
+
+# @@PLEAC@@_1.3
+v1, v2 = v2, v1
+
+alpha, beta, production = %w(January March August)
+alpha, beta, production = beta, production, alpha
+
+
+# @@PLEAC@@_1.4
+num = char[0]
+char = num.chr
+
+# Ruby also supports having a char from character constant
+num = ?r
+
+char = sprintf("%c", num)
+printf("Number %d is character %c\n", num, num)
+
+ascii = string.unpack("C*")
+string = ascii.pack("C*")
+
+hal = "HAL"
+ascii = hal.unpack("C*")
+# We can't use Array#each since we can't mutate a Fixnum
+ascii.collect! { |i|
+ i + 1 # add one to each ASCII value
+}
+ibm = ascii.pack("C*")
+puts ibm
+
+
+# @@PLEAC@@_1.5
+array = string.split('')
+
+array = string.unpack("C*")
+
+string.scan(/./) { |b|
+ # do something with b
+}
+
+string = "an apple a day"
+print "unique chars are: ", string.split('').uniq.sort, "\n"
+
+sum = 0
+for ascval in string.unpack("C*") # or use Array#each for a pure OO style :)
+ sum += ascval
+end
+puts "sum is #{sum & 0xffffffff}" # since Ruby will go Bignum if necessary
+
+# @@INCLUDE@@ include/ruby/slowcat.rb
+
+
+# @@PLEAC@@_1.6
+revbytes = string.reverse
+
+revwords = string.split(" ").reverse.join(" ")
+
+revwords = string.split(/(\s+)/).reverse.join
+
+# using the fact that IO is Enumerable, you can directly "select" it
+long_palindromes = File.open("/usr/share/dict/words").
+ select { |w| w.chomp!; w.reverse == w && w.length > 5 }
+
+
+# @@PLEAC@@_1.7
+while string.sub!("\t+") { ' ' * ($&.length * 8 - $`.length % 8) }
+end
+
+
+# @@PLEAC@@_1.8
+'You owe #{debt} to me'.gsub(/\#{(\w+)}/) { eval($1) }
+
+rows, cols = 24, 80
+text = %q(I am #{rows} high and #{cols} long)
+text.gsub!(/\#{(\w+)}/) { eval("#{$1}") }
+puts text
+
+'I am 17 years old'.gsub(/\d+/) { 2 * $&.to_i }
+
+
+# @@PLEAC@@_1.9
+e = "bo peep".upcase
+e.downcase!
+e.capitalize!
+
+"thIS is a loNG liNE".gsub!(/\w+/) { $&.capitalize }
+
+
+# @@PLEAC@@_1.10
+"I have #{n+1} guanacos."
+print "I have ", n+1, " guanacos."
+
+
+# @@PLEAC@@_1.11
+var = <<'EOF'.gsub(/^\s+/, '')
+ your text
+ goes here
+EOF
+
+
+# @@PLEAC@@_1.12
+string = "Folding and splicing is the work of an editor,\n"+
+ "not a mere collection of silicon\n"+
+ "and\n"+
+ "mobile electrons!"
+
+def wrap(str, max_size)
+ all = []
+ line = ''
+ for l in str.split
+ if (line+l).length >= max_size
+ all.push(line)
+ line = ''
+ end
+ line += line == '' ? l : ' ' + l
+ end
+ all.push(line).join("\n")
+end
+
+print wrap(string, 20)
+#=> Folding and
+#=> splicing is the
+#=> work of an editor,
+#=> not a mere
+#=> collection of
+#=> silicon and mobile
+#=> electrons!
+
+
+# @@PLEAC@@_1.13
+string = %q(Mom said, "Don't do that.")
+string.gsub(/['"]/) { '\\'+$& }
+string.gsub(/['"]/, '\&\&')
+string.gsub(/[^A-Z]/) { '\\'+$& }
+"is a test!".gsub(/\W/) { '\\'+$& } # no function like quotemeta?
+
+
+# @@PLEAC@@_1.14
+string.strip!
+
+
+# @@PLEAC@@_1.15
+def parse_csv(text)
+ new = text.scan(/"([^\"\\]*(?:\\.[^\"\\]*)*)",?|([^,]+),?|,/)
+ new << nil if text[-1] == ?,
+ new.flatten.compact
+end
+
+line = %q<XYZZY,"","O'Reilly, Inc","Wall, Larry","a \"glug\" bit,",5,"Error, Core Dumped">
+fields = parse_csv(line)
+fields.each_with_index { |v,i|
+ print "#{i} : #{v}\n";
+}
+
+
+# @@PLEAC@@_1.16
+# Use the soundex.rb Library from Michael Neumann.
+# http://www.s-direktnet.de/homepages/neumann/rb_prgs/Soundex.rb
+require 'Soundex'
+
+code = Text::Soundex.soundex(string)
+codes = Text::Soundex.soundex(array)
+
+# substitution function for getpwent():
+# returns an array of user entries,
+# each entry contains the username and the full name
+def login_names
+ result = []
+ File.open("/etc/passwd") { |file|
+ file.each_line { |line|
+ next if line.match(/^#/)
+ cols = line.split(":")
+ result.push([cols[0], cols[4]])
+ }
+ }
+ result
+end
+
+puts "Lookup user: "
+user = STDIN.gets
+user.chomp!
+exit unless user
+name_code = Text::Soundex.soundex(user)
+
+splitter = Regexp.new('(\w+)[^,]*\b(\w+)')
+for username, fullname in login_names do
+ firstname, lastname = splitter.match(fullname)[1,2]
+ if name_code == Text::Soundex.soundex(username)
+ || name_code == Text::Soundex.soundex(firstname)
+ || name_code == Text::Soundex.soundex(lastname)
+ then
+ puts "#{username}: #{firstname} #{lastname}"
+ end
+end
+
+
+# @@PLEAC@@_1.17
+# @@INCLUDE@@ include/ruby/fixstyle.rb
+
+
+# @@PLEAC@@_1.18
+# @@INCLUDE@@ include/ruby/psgrep.rb
+
+
+# @@PLEAC@@_2.1
+# Matz tells that you can use Integer() for strict checked conversion.
+Integer("abc")
+#=> `Integer': invalid value for Integer: "abc" (ArgumentError)
+Integer("567")
+#=> 567
+
+# You may use Float() for floating point stuff
+Integer("56.7")
+#=> `Integer': invalid value for Integer: "56.7" (ArgumentError)
+Float("56.7")
+#=> 56.7
+
+# You may also use a regexp for that
+if string =~ /^[+-]?\d+$/
+ p 'is an integer'
+else
+ p 'is not'
+end
+
+if string =~ /^-?(?:\d+(?:\.\d*)?|\.\d+)$/
+ p 'is a decimal number'
+else
+ p 'is not'
+end
+
+
+# @@PLEAC@@_2.2
+# equal(num1, num2, accuracy) : returns true if num1 and num2 are
+# equal to accuracy number of decimal places
+def equal(i, j, a)
+ sprintf("%.#{a}g", i) == sprintf("%.#{a}g", j)
+end
+
+wage = 536 # $5.36/hour
+week = 40 * wage # $214.40
+printf("One week's wage is: \$%.2f\n", week/100.0)
+
+
+# @@PLEAC@@_2.3
+num.round # rounds to integer
+
+a = 0.255
+b = sprintf("%.2f", a)
+print "Unrounded: #{a}\nRounded: #{b}\n"
+printf "Unrounded: #{a}\nRounded: %.2f\n", a
+
+print "number\tint\tfloor\tceil\n"
+a = [ 3.3 , 3.5 , 3.7, -3.3 ]
+for n in a
+ printf("% .1f\t% .1f\t% .1f\t% .1f\n", # at least I don't fake my output :)
+ n, n.to_i, n.floor, n.ceil)
+end
+
+
+# @@PLEAC@@_2.4
+def dec2bin(n)
+ [n].pack("N").unpack("B32")[0].sub(/^0+(?=\d)/, '')
+end
+
+def bin2dec(n)
+ [("0"*32+n.to_s)[-32..-1]].pack("B32").unpack("N")[0]
+end
+
+
+# @@PLEAC@@_2.5
+for i in x .. y
+ # i is set to every integer from x to y, inclusive
+end
+
+x.step(y,7) { |i|
+ # i is set to every integer from x to y, stepsize = 7
+}
+
+print "Infancy is: "
+(0..2).each { |i|
+ print i, " "
+}
+print "\n"
+
+
+# @@PLEAC@@_2.6
+# We can add conversion methods to the Integer class,
+# this makes a roman number just a representation for normal numbers.
+class Integer
+
+ @@romanlist = [["M", 1000],
+ ["CM", 900],
+ ["D", 500],
+ ["CD", 400],
+ ["C", 100],
+ ["XC", 90],
+ ["L", 50],
+ ["XL", 40],
+ ["X", 10],
+ ["IX", 9],
+ ["V", 5],
+ ["IV", 4],
+ ["I", 1]]
+
+ def to_roman
+ remains = self
+ roman = ""
+ for sym, num in @@romanlist
+ while remains >= num
+ remains -= num
+ roman << sym
+ end
+ end
+ roman
+ end
+
+ def Integer.from_roman(roman)
+ ustr = roman.upcase
+ sum = 0
+ for entry in @@romanlist
+ sym, num = entry[0], entry[1]
+ while sym == ustr[0, sym.length]
+ sum += num
+ ustr.slice!(0, sym.length)
+ end
+ end
+ sum
+ end
+
+end
+
+
+roman_fifteen = 15.to_roman
+puts "Roman for fifteen is #{roman_fifteen}"
+i = Integer.from_roman(roman_fifteen)
+puts "Converted back, #{roman_fifteen} is #{i}"
+
+# check
+for i in (1..3900)
+ r = i.to_roman
+ j = Integer.from_roman(r)
+ if i != j
+ puts "error: #{i} : #{r} - #{j}"
+ end
+end
+
+
+# @@PLEAC@@_2.7
+random = rand(y-x+1)+x
+
+chars = ["A".."Z","a".."z","0".."9"].collect { |r| r.to_a }.join + %q(!@$%^&*)
+password = (1..8).collect { chars[rand(chars.size)] }.pack("C*")
+
+
+# @@PLEAC@@_2.8
+srand # uses a combination of the time, the process id, and a sequence number
+srand(val) # for repeatable behaviour
+
+
+# @@PLEAC@@_2.9
+# from the randomr lib:
+# http://raa.ruby-lang.org/project/randomr/
+----> http://raa.ruby-lang.org/project/randomr/
+
+require 'random/mersenne_twister'
+mers = Random::MersenneTwister.new 123456789
+puts mers.rand(0) # 0.550321932544541
+puts mers.rand(10) # 2
+
+# using online sources of random data via the realrand package:
+# http://raa.ruby-lang.org/project/realrand/
+# **Note**
+# The following online services are used in this package:
+# http://www.random.org - source: atmospheric noise
+# http://www.fourmilab.ch/hotbits - source: radioactive decay timings
+# http://random.hd.org - source: entropy from local and network noise
+# Please visit the sites and respect the rules of each service.
+
+require 'random/online'
+
+generator1 = Random::RandomOrg.new
+puts generator1.randbyte(5).join(",")
+puts generator1.randnum(10, 1, 6).join(",") # Roll dice 10 times.
+
+generator2 = Random::FourmiLab.new
+puts generator2.randbyte(5).join(",")
+# randnum is not supported.
+
+generator3 = Random::EntropyPool.new
+puts generator3.randbyte(5).join(",")
+# randnum is not supported.
+
+
+# @@PLEAC@@_2.10
+def gaussian_rand
+ begin
+ u1 = 2 * rand() - 1
+ u2 = 2 * rand() - 1
+ w = u1*u1 + u2*u2
+ end while (w >= 1)
+ w = Math.sqrt((-2*Math.log(w))/w)
+ [ u2*w, u1*w ]
+end
+
+mean = 25
+sdev = 2
+salary = gaussian_rand[0] * sdev + mean
+printf("You have been hired at \$%.2f\n", salary)
+
+
+# @@PLEAC@@_2.11
+def deg2rad(d)
+ (d/180.0)*Math::PI
+end
+
+def rad2deg(r)
+ (r/Math::PI)*180
+end
+
+
+# @@PLEAC@@_2.12
+sin_val = Math.sin(angle)
+cos_val = Math.cos(angle)
+tan_val = Math.tan(angle)
+
+# AFAIK Ruby's Math module doesn't provide acos/asin
+# While we're at it, let's also define missing hyperbolic functions
+module Math
+ def Math.asin(x)
+ atan2(x, sqrt(1 - x**2))
+ end
+ def Math.acos(x)
+ atan2(sqrt(1 - x**2), x)
+ end
+ def Math.atan(x)
+ atan2(x, 1)
+ end
+ def Math.sinh(x)
+ (exp(x) - exp(-x)) / 2
+ end
+ def Math.cosh(x)
+ (exp(x) + exp(-x)) / 2
+ end
+ def Math.tanh(x)
+ sinh(x) / cosh(x)
+ end
+end
+
+# The support for Complex numbers is not built-in
+y = Math.acos(3.7)
+#=> in `sqrt': square root for negative number (ArgumentError)
+
+# There is an implementation of Complex numbers in 'complex.rb' in current
+# Ruby distro, but it doesn't support atan2 with complex args, so it doesn't
+# solve this problem.
+
+
+# @@PLEAC@@_2.13
+log_e = Math.log(val)
+log_10 = Math.log10(val)
+
+def log_base(base, val)
+ Math.log(val)/Math.log(base)
+end
+
+answer = log_base(10, 10_000)
+puts "log10(10,000) = #{answer}"
+
+
+# @@PLEAC@@_2.14
+require 'matrix.rb'
+
+a = Matrix[[3, 2, 3], [5, 9, 8]]
+b = Matrix[[4, 7], [9, 3], [8, 1]]
+c = a * b
+
+a.row_size
+a.column_size
+
+c.det
+a.transpose
+
+
+# @@PLEAC@@_2.15
+require 'complex.rb'
+require 'rational.rb'
+
+a = Complex(3, 5) # 3 + 5i
+b = Complex(2, -2) # 2 - 2i
+puts "c = #{a*b}"
+
+c = a * b
+d = 3 + 4*Complex::I
+
+printf "sqrt(#{d}) = %s\n", Math.sqrt(d)
+
+
+# @@PLEAC@@_2.16
+number = hexadecimal.hex
+number = octal.oct
+
+print "Gimme a number in decimal, octal, or hex: "
+num = gets.chomp
+exit unless defined?(num)
+num = num.oct if num =~ /^0/ # does both oct and hex
+printf "%d %x %o\n", num, num, num
+
+print "Enter file permission in octal: "
+permissions = gets.chomp
+raise "Exiting ...\n" unless defined?(permissions)
+puts "The decimal value is #{permissions.oct}"
+
+
+# @@PLEAC@@_2.17
+def commify(n)
+ n.to_s =~ /([^\.]*)(\..*)?/
+ int, dec = $1.reverse, $2 ? $2 : ""
+ while int.gsub!(/(,|\.|^)(\d{3})(\d)/, '\1\2,\3')
+ end
+ int.reverse + dec
+end
+
+
+# @@PLEAC@@_2.18
+printf "It took %d hour%s\n", time, time == 1 ? "" : "s"
+
+# dunno if an equivalent to Lingua::EN::Inflect exists...
+
+
+# @@PLEAC@@_2.19
+#-----------------------------
+#!/usr/bin/ruby
+# bigfact - calculating prime factors
+def factorize(orig)
+ factors = {}
+ factors.default = 0 # return 0 instead nil if key not found in hash
+ n = orig
+ i = 2
+ sqi = 4 # square of i
+ while sqi <= n do
+ while n.modulo(i) == 0 do
+ n /= i
+ factors[i] += 1
+ # puts "Found factor #{i}"
+ end
+ # we take advantage of the fact that (i +1)**2 = i**2 + 2*i +1
+ sqi += 2 * i + 1
+ i += 1
+ end
+
+ if (n != 1) && (n != orig)
+ factors[n] += 1
+ end
+ factors
+end
+
+def printfactorhash(orig, factorcount)
+ print format("%-10d ", orig)
+ if factorcount.length == 0
+ print "PRIME"
+ else
+ # sorts after number, because the hash keys are numbers
+ factorcount.sort.each { |factor,exponent|
+ print factor
+ if exponent > 1
+ print "**", exponent
+ end
+ print " "
+ }
+ end
+ puts
+end
+
+for arg in ARGV
+ n = arg.to_i
+ mfactors = factorize(n)
+ printfactorhash(n, mfactors)
+end
+#-----------------------------
+
+
+# @@PLEAC@@_3.0
+puts Time.now
+
+print "Today is day ", Time.now.yday, " of the current year.\n"
+print "Today is day ", Time.now.day, " of the current month.\n"
+
+
+# @@PLEAC@@_3.1
+day, month, year = Time.now.day, Time.now.month, Time.now.year
+# or
+day, month, year = Time.now.to_a[3..5]
+
+tl = Time.now.localtime
+printf("The current date is %04d %02d %02d\n", tl.year, tl.month, tl.day)
+
+Time.now.localtime.strftime("%Y-%m-%d")
+
+
+# @@PLEAC@@_3.2
+Time.local(year, month, day, hour, minute, second).tv_sec
+Time.gm(year, month, day, hour, minute, second).tv_sec
+
+
+# @@PLEAC@@_3.3
+sec, min, hour, day, month, year, wday, yday, isdst, zone = Time.at(epoch_secs).to_a
+
+
+# @@PLEAC@@_3.4
+when_ = now + difference # now -> Time ; difference -> Numeric (delta in seconds)
+then_ = now - difference
+
+
+# @@PLEAC@@_3.5
+bree = 361535725
+nat = 96201950
+
+difference = bree - nat
+puts "There were #{difference} seconds between Nat and Bree"
+
+seconds = difference % 60
+difference = (difference - seconds) / 60
+minutes = difference % 60
+difference = (difference - minutes) / 60
+hours = difference % 24
+difference = (difference - hours) / 24
+days = difference % 7
+weeks = (difference - days) / 7
+
+puts "(#{weeks} weeks, #{days} days, #{hours}:#{minutes}:#{seconds})"
+
+
+# @@PLEAC@@_3.6
+monthday, weekday, yearday = date.mday, date.wday, date.yday
+
+# AFAIK the week number is not just a division since week boundaries are on sundays
+weeknum = d.strftime("%U").to_i + 1
+
+year = 1981
+month = "jun" # or `6' if you want to emulate a broken language
+day = 16
+t = Time.mktime(year, month, day)
+print "#{month}/#{day}/#{year} was a ", t.strftime("%A"), "\n"
+
+
+# @@PLEAC@@_3.7
+yyyy, mm, dd = $1, $2, $3 if "1998-06-25" =~ /(\d+)-(\d+)-(\d+)/
+
+epoch_seconds = Time.mktime(yyyy, mm, dd).tv_sec
+
+# dunno an equivalent to Date::Manip#ParseDate
+
+
+# @@PLEAC@@_3.8
+string = Time.at(epoch_secs)
+Time.at(1234567890).gmtime # gives: Fri Feb 13 23:31:30 UTC 2009
+
+time = Time.mktime(1973, "jan", 18, 3, 45, 50)
+print "In localtime it gives: ", time.localtime, "\n"
+
+
+# @@PLEAC@@_3.9
+# Ruby provides micro-seconds in Time object
+Time.now.usec
+
+# Ruby gives the seconds in floating format when substracting two Time objects
+before = Time.now
+line = gets
+elapsed = Time.now - before
+puts "You took #{elapsed} seconds."
+
+# On my Celeron-400 with Linux-2.2.19-14mdk, average for three execs are:
+# This Ruby version: average 0.00321 sec
+# Cookbook's Perl version: average 0.00981 sec
+size = 500
+number_of_times = 100
+total_time = 0
+number_of_times.times {
+ # populate array
+ array = []
+ size.times { array << rand }
+ # sort it
+ begin_ = Time.now
+ array.sort!
+ time = Time.now - begin_
+ total_time += time
+}
+printf "On average, sorting %d random numbers takes %.5f seconds\n",
+ size, (total_time/Float(number_of_times))
+
+
+# @@PLEAC@@_3.10
+sleep(0.005) # Ruby is definitely not as broken as Perl :)
+# (may be interrupted by sending the process a SIGALRM)
+
+
+# @@PLEAC@@_3.11
+#!/usr/bin/ruby -w
+# hopdelta - feed mail header, produce lines
+# showing delay at each hop.
+require 'time'
+class MailHopDelta
+
+ def initialize(mail)
+ @head = mail.gsub(/\n\s+/,' ')
+ @topline = %w-Sender Recipient Time Delta-
+ @start_from = mail.match(/^From.*\@([^\s>]*)/)[1]
+ @date = Time.parse(mail.match(/^Date:\s+(.*)/)[1])
+ end
+
+ def out(line)
+ "%-20.20s %-20.20s %-20.20s %s" % line
+ end
+
+ def hop_date(day)
+ day.strftime("%I:%M:%S %Y/%m/%d")
+ end
+
+ def puts_hops
+ puts out(@topline)
+ puts out(['Start', @start_from, hop_date(@date),''])
+ @head.split(/\n/).reverse.grep(/^Received:/).each do |hop|
+ hop.gsub!(/\bon (.*?) (id.*)/,'; \1')
+ whence = hop.match(/;\s+(.*)$/)[1]
+ unless whence
+ warn "Bad received line: #{hop}"
+ next
+ end
+ from = $+ if hop =~ /from\s+(\S+)|\((.*?)\)/
+ by = $1 if hop =~ /by\s+(\S+\.\S+)/
+ next unless now = Time.parse(whence).localtime
+ delta = now - @date
+ puts out([from, by, hop_date(now), hop_time(delta)])
+ @date = now
+ end
+ end
+
+ def hop_time(secs)
+ sign = secs < 0 ? -1 : 1
+ days, secs = secs.abs.divmod(60 * 60 * 24)
+ hours,secs = secs.abs.divmod(60 * 60)
+ mins, secs = secs.abs.divmod(60)
+ rtn = "%3ds" % [secs * sign]
+ rtn << "%3dm" % [mins * sign] if mins != 0
+ rtn << "%3dh" % [hours * sign] if hours != 0
+ rtn << "%3dd" % [days * sign] if days != 0
+ rtn
+ end
+end
+
+$/ = ""
+mail = MailHopDelta.new(ARGF.gets).puts_hops
+
+
+# @@PLEAC@@_4.0
+single_level = [ "this", "that", "the", "other" ]
+
+# Ruby directly supports nested arrays
+double_level = [ "this", "that", [ "the", "other" ] ]
+still_single_level = [ "this", "that", [ "the", "other" ] ].flatten
+
+
+# @@PLEAC@@_4.1
+a = [ "quick", "brown", "fox" ]
+a = %w(Why are you teasing me?)
+
+lines = <<"END_OF_HERE_DOC".gsub(/^\s*(.+)/, '\1')
+ The boy stood on the burning deck,
+ It was as hot as glass.
+END_OF_HERE_DOC
+
+bigarray = IO.readlines("mydatafile").collect { |l| l.chomp }
+
+name = "Gandalf"
+banner = %Q(Speak, #{name}, and welcome!)
+
+host_info = `host #{his_host}`
+
+%x(ps #{$$})
+
+banner = 'Costs only $4.95'.split(' ')
+
+rax = %w! ( ) < > { } [ ] !
+
+
+# @@PLEAC@@_4.2
+def commify_series(a)
+ a.size == 0 ? '' :
+ a.size == 1 ? a[0] :
+ a.size == 2 ? a.join(' and ') :
+ a[0..-2].join(', ') + ', and ' + a[-1]
+end
+
+array = [ "red", "yellow", "green" ]
+
+print "I have ", array, " marbles\n"
+# -> I have redyellowgreen marbles
+
+# But unlike Perl:
+print "I have #{array} marbles\n"
+# -> I have redyellowgreen marbles
+# So, needs:
+print "I have #{array.join(' ')} marbles\n"
+# -> I have red yellow green marbles
+
+def commify_series(a)
+ sepchar = a.select { |p| p =~ /,/ } != [] ? '; ' : ', '
+ a.size == 0 ? '' :
+ a.size == 1 ? a[0] :
+ a.size == 2 ? a.join(' and ') :
+ a[0..-2].join(sepchar) + sepchar + 'and ' + a[-1]
+end
+
+
+# @@PLEAC@@_4.3
+# (note: AFAIK Ruby doesn't allow gory change of Array length)
+# grow the array by assigning nil to past the end of array
+ary[new_size-1] = nil
+# shrink the array by slicing it down
+ary.slice!(new_size..-1)
+# init the array with given size
+Array.new(number_of_elems)
+# assign to an element past the original end enlarges the array
+ary[index_new_last_elem] = value
+
+def what_about_that_array(a)
+ print "The array now has ", a.size, " elements.\n"
+ # Index of last element is not really interesting in Ruby
+ print "Element #3 is `#{a[3]}'.\n"
+end
+people = %w(Crosby Stills Nash Young)
+what_about_that_array(people)
+
+
+# @@PLEAC@@_4.4
+# OO style
+bad_users.each { |user|
+ complain(user)
+}
+# or, functional style
+for user in bad_users
+ complain(user)
+end
+
+for var in ENV.keys.sort
+ puts "#{var}=#{ENV[var]}"
+end
+
+for user in all_users
+ disk_space = get_usage(user)
+ if (disk_space > MAX_QUOTA)
+ complain(user)
+ end
+end
+
+for l in IO.popen("who").readlines
+ print l if l =~ /^gc/
+end
+
+# we can mimic the obfuscated Perl way
+while fh.gets # $_ is set to the line just read
+ chomp # $_ has a trailing \n removed, if it had one
+ split.each { |w| # $_ is split on whitespace
+ # but $_ is not set to each chunk as in Perl
+ print w.reverse
+ }
+end
+# ...or use a cleaner way
+for l in fh.readlines
+ l.chomp.split.each { |w| print w.reverse }
+end
+
+# same drawback as in problem 1.4, we can't mutate a Numeric...
+array.collect! { |v| v - 1 }
+
+a = [ .5, 3 ]; b = [ 0, 1 ]
+for ary in [ a, b ]
+ ary.collect! { |v| v * 7 }
+end
+puts "#{a.join(' ')} #{b.join(' ')}"
+
+# we can mutate Strings, cool; we need a trick for the scalar
+for ary in [ [ scalar ], array, hash.values ]
+ ary.each { |v| v.strip! } # String#strip rules :)
+end
+
+
+# @@PLEAC@@_4.5
+# not relevant in Ruby since we have always references
+for item in array
+ # do somethingh with item
+end
+
+
+# @@PLEAC@@_4.6
+unique = list.uniq
+
+# generate a list of users logged in, removing duplicates
+users = `who`.collect { |l| l =~ /(\w+)/; $1 }.sort.uniq
+puts("users logged in: #{commify_series(users)}") # see 4.2 for commify_series
+
+
+# @@PLEAC@@_4.7
+a - b
+# [ 1, 1, 2, 2, 3, 3, 3, 4, 5 ] - [ 1, 2, 4 ] -> [3, 5]
+
+
+# @@PLEAC@@_4.8
+union = a | b
+intersection = a & b
+difference = a - b
+
+
+# @@PLEAC@@_4.9
+array1.concat(array2)
+# if you will assign to another object, better use:
+new_ary = array1 + array2
+
+members = [ "Time", "Flies" ]
+initiates = [ "An", "Arrow" ]
+members += initiates
+
+members = [ "Time", "Flies" ]
+initiates = [ "An", "Arrow" ]
+members[2,0] = [ "Like", initiates ].flatten
+
+members[0] = "Fruit"
+members[3,2] = "A", "Banana"
+
+
+# @@PLEAC@@_4.10
+reversed = ary.reverse
+
+ary.reverse_each { |e|
+ # do something with e
+}
+
+descending = ary.sort.reverse
+descending = ary.sort { |a,b| b <=> a }
+
+
+# @@PLEAC@@_4.11
+# remove n elements from front of ary (shift n)
+front = ary.slice!(0, n)
+
+# remove n elements from the end of ary (pop n)
+end_ = ary.slice!(-n .. -1)
+
+# let's extend the Array class, to make that useful
+class Array
+ def shift2()
+ slice!(0 .. 1) # more symetric with pop2...
+ end
+ def pop2()
+ slice!(-2 .. -1)
+ end
+end
+
+friends = %w(Peter Paul Mary Jim Tim)
+this, that = friends.shift2
+
+beverages = %w(Dew Jolt Cola Sprite Fresca)
+pair = beverages.pop2
+
+
+# @@PLEAC@@_4.12
+# use Enumerable#detect (or the synonym Enumerable#find)
+highest_eng = employees.detect { |emp| emp.category == 'engineer' }
+
+
+# @@PLEAC@@_4.13
+# use Enumerable#select (or the synonym Enumerable#find_all)
+bigs = nums.select { |i| i > 1_000_000 }
+pigs = users.keys.select { |k| users[k] > 1e7 }
+
+matching = `who`.select { |u| u =~ /^gnat / }
+
+engineers = employees.select { |e| e.position == 'Engineer' }
+
+secondary_assistance = applicants.select { |a|
+ a.income >= 26_000 && a.income < 30_000
+}
+
+
+# @@PLEAC@@_4.14
+# normally you would have an array of Numeric (Float or
+# Fixnum or Bignum), so you would use:
+sorted = unsorted.sort
+# if you have strings representing Integers or Floats
+# you may specify another sort method:
+sorted = unsorted.sort { |a,b| a.to_f <=> b.to_f }
+
+# let's use the list of my own PID's
+`ps ux`.split("\n")[1..-1].
+ select { |i| i =~ /^#{ENV['USER']}/ }.
+ collect { |i| i.split[1] }.
+ sort { |a,b| a.to_i <=> b.to_i }.each { |i| puts i }
+puts "Select a process ID to kill:"
+pid = gets.chomp
+raise "Exiting ... \n" unless pid && pid =~ /^\d+$/
+Process.kill('TERM', pid.to_i)
+sleep 2
+Process.kill('KILL', pid.to_i)
+
+descending = unsorted.sort { |a,b| b.to_f <=> a.to_f }
+
+
+# @@PLEAC@@_4.15
+ordered = unordered.sort { |a,b| compare(a,b) }
+
+precomputed = unordered.collect { |e| [compute, e] }
+ordered_precomputed = precomputed.sort { |a,b| a[0] <=> b[0] }
+ordered = ordered_precomputed.collect { |e| e[1] }
+
+ordered = unordered.collect { |e| [compute, e] }.
+ sort { |a,b| a[0] <=> b[0] }.
+ collect { |e| e[1] }
+
+for employee in employees.sort { |a,b| a.name <=> b.name }
+ print employee.name, " earns \$ ", employee.salary, "\n"
+end
+
+# Beware! `0' is true in Ruby.
+# For chaining comparisons, you may use Numeric#nonzero?, which
+# returns num if num is not zero, nil otherwise
+sorted = employees.sort { |a,b| (a.name <=> b.name).nonzero? || b.age <=> a.age }
+
+users = []
+# getpwent is not wrapped in Ruby... let's fallback
+IO.readlines('/etc/passwd').each { |u| users << u.split(':') }
+users.sort! { |a,b| a[0] <=> b[0] }
+for user in users
+ puts user[0]
+end
+
+sorted = names.sort { |a,b| a[1, 1] <=> b[1, 1] }
+sorted = strings.sort { |a,b| a.length <=> b.length }
+
+# let's show only the compact version
+ordered = strings.collect { |e| [e.length, e] }.
+ sort { |a,b| a[0] <=> b[0] }.
+ collect { |e| e[1] }
+
+ordered = strings.collect { |e| [/\d+/.match(e)[0].to_i, e] }.
+ sort { |a,b| a[0] <=> b[0] }.
+ collect { |e| e[1] }
+
+print `cat /etc/passwd`.collect { |e| [e, e.split(':').indexes(3,2,0)].flatten }.
+ sort { |a,b| (a[1] <=> b[1]).nonzero? || (a[2] <=> b[2]).nonzero? || a[3] <=> b[3] }.
+ collect { |e| e[0] }
+
+
+# @@PLEAC@@_4.16
+circular.unshift(circular.pop) # the last shall be first
+circular.push(circular.shift) # and vice versa
+
+def grab_and_rotate(l)
+ l.push(ret = l.shift)
+ ret
+end
+
+processes = [1, 2, 3, 4, 5]
+while (1)
+ process = grab_and_rotate(processes)
+ puts "Handling process #{process}"
+ sleep 1
+end
+
+
+# @@PLEAC@@_4.17
+def fisher_yates_shuffle(a)
+ (a.size-1).downto(1) { |i|
+ j = rand(i+1)
+ a[i], a[j] = a[j], a[i] if i != j
+ }
+end
+
+def naive_shuffle(a)
+ for i in 0...a.size
+ j = rand(a.size)
+ a[i], a[j] = a[j], a[i]
+ end
+end
+
+
+# @@PLEAC@@_4.18
+#!/usr/bin/env ruby
+# example 4-2 words
+# words - gather lines, present in colums
+
+# class to encapsulate the word formatting from the input
+class WordFormatter
+ def initialize(cols)
+ @cols = cols
+ end
+
+ # helper to return the length of the longest word in the wordlist
+ def maxlen(wordlist)
+ max = 1
+ for word in wordlist
+ if word.length > max
+ max = word.length
+ end
+ end
+ max
+ end
+
+ # process the wordlist and print it formmated into columns
+ def output(wordlist)
+ collen = maxlen(wordlist) + 1
+ columns = @cols / collen
+ columns = 1 if columns == 0
+ rows = (wordlist.length + columns - 1) / columns
+ # now process each item, picking out proper piece for this position
+ 0.upto(rows * columns - 1) { |item|
+ target = (item % columns) * rows + (item / columns)
+ eol = ((item+1) % columns == 0)
+ piece = wordlist[target] || ""
+ piece = piece.ljust(collen) unless eol
+ print piece
+ puts if eol
+ }
+ # no need to finish it up, because eol is always true for the last element
+ end
+end
+
+# get nr of chars that fit in window or console, see PLEAC 15.4
+# not portable -- linux only (?)
+def getWinCharWidth()
+ buf = "\0" * 8
+ $stdout.ioctl(0x5413, buf)
+ ws_row, ws_col, ws_xpixel, ws_ypixel = buf.unpack("$4")
+ ws_col || 80
+rescue
+ 80
+end
+
+# main program
+cols = getWinCharWidth()
+formatter = WordFormatter.new(cols)
+words = readlines()
+words.collect! { |line|
+ line.chomp
+}
+formatter.output(words)
+
+
+# @@PLEAC@@_4.19
+# In ruby, Fixnum's are automatically converted to Bignum's when
+# needed, so there is no need for an extra module
+def factorial(n)
+ s = 1
+ while n > 0
+ s *= n
+ n -= 1
+ end
+ s
+end
+
+puts factorial(500)
+
+#---------------------------------------------------------
+# Example 4-3. tsc-permute
+# tsc_permute: permute each word of input
+def permute(items, perms)
+ unless items.length > 0
+ puts perms.join(" ")
+ else
+ for i in items
+ newitems = items.dup
+ newperms = perms.dup
+ newperms.unshift(newitems.delete(i))
+ permute(newitems, newperms)
+ end
+ end
+end
+# In ruby the main program must be after all definitions it is using
+permute(ARGV, [])
+
+#---------------------------------------------------------
+# mjd_permute: permute each word of input
+
+def factorial(n)
+ s = 1
+ while n > 0
+ s *= n
+ n -= 1
+ end
+ s
+end
+
+# we use a class with a class variable store the private cache
+# for the results of the factorial function.
+class Factorial
+ @@fact = [ 1 ]
+ def Factorial.compute(n)
+ if @@fact[n]
+ @@fact[n]
+ else
+ @@fact[n] = n * Factorial.compute(n - 1)
+ end
+ end
+end
+
+#---------------------------------------------------------
+# Example 4-4- mjd-permute
+# n2pat(n, len): produce the N-th pattern of length len
+
+# We must use a lower case letter as parameter N, otherwise it is
+# handled as constant Length is the length of the resulting
+# array, not the index of the last element (length -1) like in
+# the perl example.
+def n2pat(n, length)
+ pat = []
+ i = 1
+ while i <= length
+ pat.push(n % i)
+ n /= i
+ i += 1
+ end
+ pat
+end
+
+# pat2perm(pat): turn pattern returned by n2pat() into
+# permutation of integers.
+def pat2perm(pat)
+ source = (0 .. pat.length - 1).to_a
+ perm = []
+ perm.push(source.slice!(pat.pop)) while pat.length > 0
+ perm
+end
+
+def n2perm(n, len)
+ pat2perm(n2pat(n,len))
+end
+
+# In ruby the main program must be after all definitions
+while gets
+ data = split
+ # the perl solution has used $#data, which is length-1
+ num_permutations = Factorial.compute(data.length())
+ 0.upto(num_permutations - 1) do |i|
+ # in ruby we can not use an array as selector for an array
+ # but by exchanging the two arrays, we can use the collect method
+ # which returns an array with the result of all block invocations
+ permutation = n2perm(i, data.length).collect {
+ |j| data[j]
+ }
+ puts permutation.join(" ")
+ end
+end
+
+
+# @@PLEAC@@_5.0
+age = { "Nat", 24,
+ "Jules", 25,
+ "Josh", 17 }
+
+age["Nat"] = 24
+age["Jules"] = 25
+age["Josh"] = 17
+
+food_color = {
+ "Apple" => "red",
+ "Banana" => "yellow",
+ "Lemon" => "yellow",
+ "Carrot" => "orange"
+ }
+
+# In Ruby, you cannot avoid the double or simple quoting
+# while manipulatin hashes
+
+
+# @@PLEAC@@_5.1
+hash[key] = value
+
+food_color["Raspberry"] = "pink"
+puts "Known foods:", food_color.keys
+
+
+# @@PLEAC@@_5.2
+# does hash have a value for key ?
+if (hash.has_key?(key))
+ # it exists
+else
+ # it doesn't
+end
+
+[ "Banana", "Martini" ].each { |name|
+ print name, " is a ", food_color.has_key?(name) ? "food" : "drink", "\n"
+}
+
+age = {}
+age['Toddler'] = 3
+age['Unborn'] = 0
+age['Phantasm'] = nil
+
+for thing in ['Toddler', 'Unborn', 'Phantasm', 'Relic']
+ print "#{thing}: "
+ print "Has-key " if age.has_key?(thing)
+ print "True " if age[thing]
+ print "Nonzero " if age[thing] && age[thing].nonzero?
+ print "\n"
+end
+
+#=>
+# Toddler: Has-key True Nonzero
+# Unborn: Has-key True
+# Phantasm: Has-key
+# Relic:
+
+# You use Hash#has_key? when you use Perl's exists -> it checks
+# for existence of a key in a hash.
+# All Numeric are "True" in ruby, so the test doesn't have the
+# same semantics as in Perl; you would use Numeric#nonzero? to
+# achieve the same semantics (false if 0, true otherwise).
+
+
+# @@PLEAC@@_5.3
+food_color.delete("Banana")
+
+
+# @@PLEAC@@_5.4
+hash.each { |key, value|
+ # do something with key and value
+}
+
+hash.each_key { |key|
+ # do something with key
+}
+
+food_color.each { |food, color|
+ puts "#{food} is #{color}"
+}
+
+food_color.each_key { |food|
+ puts "#{food} is #{food_color[food]}"
+}
+
+# IMO this demonstrates that OO style is by far more readable
+food_color.keys.sort.each { |food|
+ puts "#{food} is #{food_color[food]}."
+}
+
+#-----------------------------
+#!/usr/bin/ruby
+# countfrom - count number of messages from each sender
+
+# Default value is 0
+from = Hash.new(0)
+while gets
+ /^From: (.*)/ and from[$1] += 1
+end
+
+# More useful to sort by number of received mail by person
+from.sort {|a,b| b[1]<=>a[1]}.each { |v|
+ puts "#{v[1]}: #{v[0]}"
+}
+#-----------------------------
+
+
+# @@PLEAC@@_5.5
+# You may use the built-in 'inspect' method this way:
+p hash
+
+# Or do it the Cookbook way:
+hash.each { |k,v| puts "#{k} => #{v}" }
+
+# Sorted by keys
+hash.sort.each { |e| puts "#{e[0]} => #{e[1]}" }
+# Sorted by values
+hash.sort{|a,b| a[1]<=>b[1]}.each { |e| puts "#{e[0]} => #{e[1]}" }
+
+
+# @@PLEAC@@_5.7
+ttys = Hash.new
+for i in `who`
+ user, tty = i.split
+ (ttys[user] ||= []) << tty # see problems_ruby for more infos
+end
+ttys.keys.sort.each { |k|
+ puts "#{k}: #{commify_series(ttys[k])}" # from 4.2
+}
+
+
+# @@PLEAC@@_5.8
+surname = { "Mickey" => "Mantle", "Babe" => "Ruth" }
+puts surname.index("Mantle")
+
+# If you really needed to 'invert' the whole hash, use Hash#invert
+
+#-----------------------------
+#!/usr/bin/ruby -w
+# foodfind - find match for food or color
+
+given = ARGV.shift or raise "usage: foodfind food_or_color"
+
+color = {
+ "Apple" => "red",
+ "Banana" => "yellow",
+ "Lemon" => "yellow",
+ "Carrot" => "orange",
+}
+
+if (color.has_key?(given))
+ puts "#{given} is a food with color #{color[given]}."
+end
+if (color.has_value?(given))
+ puts "#{color.index(given)} is a food with color #{given}."
+end
+#-----------------------------
+
+
+# @@PLEAC@@_5.9
+# Sorted by keys (Hash#sort gives an Array of pairs made of each key,value)
+food_color.sort.each { |f|
+ puts "#{f[0]} is #{f[1]}."
+}
+
+# Sorted by values
+food_color.sort { |a,b| a[1] <=> b[1] }.each { |f|
+ puts "#{f[0]} is #{f[1]}."
+}
+
+# Sorted by length of values
+food_color.sort { |a,b| a[1].length <=> b[1].length }.each { |f|
+ puts "#{f[0]} is #{f[1]}."
+}
+
+
+# @@PLEAC@@_5.10
+merged = a.clone.update(b) # because Hash#update changes object in place
+
+drink_color = { "Galliano" => "yellow", "Mai Tai" => "blue" }
+ingested_color = drink_color.clone.update(food_color)
+
+substance_color = {}
+for i in [ food_color, drink_color ]
+ i.each_key { |k|
+ if substance_color.has_key?(k)
+ puts "Warning: #{k} seen twice. Using the first definition."
+ next
+ end
+ substance_color[k] = 1
+ }
+end
+
+
+# @@PLEAC@@_5.11
+common = hash1.keys & hash2.keys
+
+this_not_that = hash1.keys - hash2.keys
+
+
+# @@PLEAC@@_5.12
+# no problem here, Ruby handles any kind of object for key-ing
+# (it takes Object#hash, which defaults to Object#id)
+
+
+# @@PLEAC@@_5.13
+# AFAIK, not possible in Ruby
+
+
+# @@PLEAC@@_5.14
+# Be careful, the following is possible only because Fixnum objects are
+# special (documentation says: there is effectively only one Fixnum object
+# instance for any given integer value).
+count = Hash.new(0)
+array.each { |e|
+ count[e] += 1
+}
+
+
+# @@PLEAC@@_5.15
+father = {
+ "Cain" , "Adam",
+ "Abel" , "Adam",
+ "Seth" , "Adam",
+ "Enoch" , "Cain",
+ "Irad" , "Enoch",
+ "Mehujael" , "Irad",
+ "Methusael" , "Mehujael",
+ "Lamech" , "Methusael",
+ "Jabal" , "Lamech",
+ "Jubal" , "Lamech",
+ "Tubalcain" , "Lamech",
+ "Enos" , "Seth",
+}
+
+while gets
+ chomp
+ begin
+ print $_, " "
+ end while $_ = father[$_]
+ puts
+end
+
+children = {}
+father.each { |k,v|
+ (children[v] ||= []) << k
+}
+while gets
+ chomp
+ puts "#{$_} begat #{(children[$_] || ['Nobody']).join(', ')}.\n"
+end
+
+includes = {}
+files.each { |f|
+ begin
+ for l in IO.readlines(f)
+ next unless l =~ /^\s*#\s*include\s*<([^>]+)>/
+ (includes[$1] ||= []) << f
+ end
+ rescue SystemCallError
+ $stderr.puts "#$! (skipping)"
+ end
+}
+
+include_free = includes.values.flatten.uniq - includes.keys
+
+
+# @@PLEAC@@_5.16
+# dutree - print sorted intented rendition of du output
+#% dutree
+#% dutree /usr
+#% dutree -a
+#% dutree -a /bin
+
+# The DuNode class collects all information about a directory,
+# and provides some convenience methods
+class DuNode
+
+ attr_reader :name
+ attr_accessor :size
+ attr_accessor :kids
+
+ def initialize(name)
+ @name = name
+ @kids = []
+ @size = 0
+ end
+
+ # support for sorting nodes with side
+ def size_compare(node2)
+ @size <=> node2.size
+ end
+
+ def basename
+ @name.sub(/.*\//, "")
+ end
+
+ #returns substring before last "/", nil if not there
+ def parent
+ p = @name.sub(/\/[^\/]+$/,"")
+ if p == @name
+ nil
+ else
+ p
+ end
+ end
+
+end
+
+# The DuTree does the acdtual work of
+# getting the input, parsing it, builging up a tree
+# and format it for output
+class Dutree
+
+ attr_reader :topdir
+
+ def initialize
+ @nodes = Hash.new
+ @dirsizes = Hash.new(0)
+ @kids = Hash.new([])
+ end
+
+ # get a node by name, create it if it does not exist yet
+ def get_create_node(name)
+ if @nodes.has_key?(name)
+ @nodes[name]
+ else
+ node = DuNode.new(name)
+ @nodes[name] = node
+ node
+ end
+ end
+
+ # run du, read in input, save sizes and kids
+ # stores last directory read in instance variable topdir
+ def input(arguments)
+ name = ""
+ cmd = "du " + arguments.join(" ")
+ IO.popen(cmd) { |pipe|
+ pipe.each { |line|
+ size, name = line.chomp.split(/\s+/, 2)
+ node = get_create_node(name)
+ node.size = size.to_i
+ @nodes[name] = node
+ parent = node.parent
+ if parent
+ get_create_node(parent).kids.push(node)
+ end
+ }
+ }
+ @topdir = @nodes[name]
+ end
+
+ # figure out how much is taken in each directory
+ # that isn't stored in the subdirectories. Add a new
+ # fake kid called "." containing that much.
+ def get_dots(node)
+ cursize = node.size
+ for kid in node.kids
+ cursize -= kid.size
+ get_dots(kid)
+ end
+ if node.size != cursize
+ newnode = get_create_node(node.name + "/.")
+ newnode.size = cursize
+ node.kids.push(newnode)
+ end
+ end
+
+ # recursively output everything
+ # passing padding and number width as well
+ # on recursive calls
+ def output(node, prefix="", width=0)
+ line = sprintf("%#{width}d %s", node.size, node.basename)
+ puts(prefix + line)
+ prefix += line.sub(/\d /, "| ")
+ prefix.gsub!(/[^|]/, " ")
+ if node.kids.length > 0 # not a bachelor node
+ kids = node.kids
+ kids.sort! { |a,b|
+ b.size_compare(a)
+ }
+ width = kids[0].size.to_s.length
+ for kid in kids
+ output(kid, prefix, width)
+ end
+ end
+ end
+
+end
+
+tree = Dutree.new
+tree.input(ARGV)
+tree.get_dots(tree.topdir)
+tree.output(tree.topdir)
+
+
+# @@PLEAC@@_6.0
+# The verbose version are match, sub, gsub, sub! and gsub!;
+# pattern needs to be a Regexp object; it yields a MatchData
+# object.
+pattern.match(string)
+string.sub(pattern, replacement)
+string.gsub(pattern, replacement)
+# As usual in Ruby, sub! does the same as sub but also modifies
+# the object, the same for gsub!/gsub.
+
+# Sugared syntax yields the position of the match (or nil if no
+# match). Note that the object at the right of the operator needs
+# not to be a Regexp object (it can be a String). The "dont
+# match" operator yields true or false.
+meadow =~ /sheep/ # position of the match, nil if no match
+meadow !~ /sheep/ # true if doesn't match, false if it does
+# There is no sugared version for the substitution
+
+meadow =~ /\bovines?\b/i and print "Here be sheep!"
+
+string = "good food"
+string.sub!(/o*/, 'e')
+
+# % echo ababacaca | ruby -ne 'puts $& if /(a|ba|b)+(a|ac)+/'
+# ababa
+
+# The "global" (or "multiple") match is handled by String#scan
+scan (/(\d+)/) {
+ puts "Found number #{$1}"
+}
+
+# String#scan yields an Array if not used with a block
+numbers = scan(/\d+/)
+
+digits = "123456789"
+nonlap = digits.scan(/(\d\d\d)/)
+yeslap = digits.scan(/(?=(\d\d\d))/)
+puts "Non-overlapping: #{nonlap.join(' ')}"
+puts "Overlapping: #{yeslap.join(' ')}";
+# Non-overlapping: 123 456 789
+# Overlapping: 123 234 345 456 567 678 789
+
+string = "And little lambs eat ivy"
+string =~ /l[^s]*s/
+puts "(#$`) (#$&) (#$')"
+# (And ) (little lambs) ( eat ivy)
+
+
+# @@PLEAC@@_6.1
+# Ruby doesn't have the same problem:
+dst = src.sub('this', 'that')
+
+progname = $0.sub('^.*/', '')
+
+bindirs = %w(/usr/bin /bin /usr/local/bin)
+libdirs = bindirs.map { |l| l.sub('bin', 'lib') }
+
+
+# @@PLEAC@@_6.3
+/\S+/ # as many non-whitespace bytes as possible
+/[A-Za-z'-]+/ # as many letters, apostrophes, and hyphens
+
+/\b([A-Za-z]+)\b/ # usually best
+/\s([A-Za-z]+)\s/ # fails at ends or w/ punctuation
+
+
+# @@PLEAC@@_6.4
+require 'socket'
+str = 'www.ruby-lang.org and www.rubygarden.org'
+re = /
+ ( # capture the hostname in $1
+ (?: # these parens for grouping only
+ (?! [-_] ) # lookahead for neither underscore nor dash
+ [\w-] + # hostname component
+ \. # and the domain dot
+ ) + # now repeat that whole thing a bunch of times
+ [A-Za-z] # next must be a letter
+ [\w-] + # now trailing domain part
+ ) # end of $1 capture
+ /x # /x for nice formatting
+
+str.gsub! re do # pass a block to execute replacement
+ host = TCPsocket.gethostbyname($1)
+ "#{$1} [#{host[3]}]"
+end
+
+puts str
+#-----------------------------
+# to match whitespace or #-characters in an extended re you need to escape
+# them.
+
+foo = 42
+str = 'blah #foo# blah'
+str.gsub! %r/ # replace
+ \# # a pound sign
+ (\w+) # the variable name
+ \# # another pound sign
+ /x do
+ eval $1 # with the value of a local variable
+ end
+puts str # => blah 42 blah
+
+
+# @@PLEAC@@_6.5
+# The 'g' modifier doesn't exist in Ruby, a regexp can't be used
+# directly in a while loop; instead, use String#scan { |match| .. }
+fish = 'One fish two fish red fish blue fish'
+WANT = 3
+count = 0
+fish.scan(/(\w+)\s+fish\b/i) {
+ if (count += 1) == WANT
+ puts "The third fish is a #{$1} one."
+ end
+}
+
+if fish =~ /(?:\w+\s+fish\s+){2}(\w+)\s+fish/i
+ puts "The third fish is a #{$1} one."
+end
+
+pond = 'One fish two fish red fish blue fish'
+# String#scan without a block gives an array of matches, each match
+# being an array of all the specified groups
+colors = pond.scan(/(\w+)\s+fish\b/i).flatten # get all matches
+color = colors[2] # then the one we want
+# or without a temporary array
+color = pond.scan(/(\w+)\s+fish\b/i).flatten[2] # just grab element 3
+puts "The third fish in the pond is #{color}."
+
+count = 0
+fishes = 'One fish two fish red fish blue fish'
+evens = fishes.scan(/(\w+)\s+fish\b/i).select { (count+=1) % 2 == 0 }
+print "Even numbered fish are #{evens.join(' ')}."
+
+count = 0
+fishes.gsub(/
+ \b # makes next \w more efficient
+ ( \w+ ) # this is what we\'ll be changing
+ (
+ \s+ fish \b
+ )
+ /x) {
+ if (count += 1) == 4
+ 'sushi' + $2
+ else
+ $1 + $2
+ end
+}
+
+pond = 'One fish two fish red fish blue fish swim here.'
+puts "Last fish is #{pond.scan(/\b(\w+)\s+fish\b/i).flatten[-1]}"
+
+/
+ A # find some pattern A
+ (?! # mustn\'t be able to find
+ .* # something
+ A # and A
+ )
+ $ # through the end of the string
+/x
+
+# The "s" perl modifier is "m" in Ruby (not very nice since there is
+# also an "m" in perl..)
+pond = "One fish two fish red fish blue fish swim here."
+if (pond =~ /
+ \b ( \w+) \s+ fish \b
+ (?! .* \b fish \b )
+ /mix)
+ puts "Last fish is #{$1}."
+else
+ puts "Failed!"
+end
+
+
+# @@PLEAC@@_6.6
+#-----------------------------
+#!/usr/bin/ruby -w
+# killtags - very bad html killer
+$/ = nil; # each read is whole file
+while file = gets() do
+ file.gsub!(/<.*?>/m,''); # strip tags (terribly)
+ puts file # print file to STDOUT
+end
+#-----------------------------
+#!/usr/bin/ruby -w
+#headerfy - change certain chapter headers to html
+$/ = ''
+while file = gets() do
+ pattern = /
+ \A # start of record
+ ( # capture in $1
+ Chapter # text string
+ \s+ # mandatory whitespace
+ \d+ # decimal number
+ \s* # optional whitespace
+ : # a real colon
+ . * # anything not a newline till end of line
+ )
+ /x
+ puts file.gsub(pattern,'<H1>\1</H1>')
+end
+#-----------------------------
+#% ruby -00pe "gsub!(/\A(Chapter\s+\d+\s*:.*)/,'<H1>\1</H1>')" datafile
+
+#!/usr/bin/ruby -w
+#-----------------------------
+for file in ARGV
+ file = File.open(ARGV.shift)
+ while file.gets('') do # each read is a paragraph
+ print "chunk #{$.} in $ARGV has <<#{$1}>>\n" while /^START(.*?)^END/m
+ end # /m activates the multiline mode
+end
+#-----------------------------
+
+# @@PLEAC@@_6.7
+#-----------------------------
+$/ = nil;
+file = File.open("datafile")
+chunks = file.gets.split(/pattern/)
+#-----------------------------
+# .Ch, .Se and .Ss divide chunks of STDIN
+chunks = gets(nil).split(/^\.(Ch|Se|Ss)$/)
+print "I read #{chunks.size} chunks.\n"
+#-----------------------------
+
+
+# @@PLEAC@@_6.8
+while gets
+ if ~/BEGIN/ .. ~/END/
+ # line falls between BEGIN and END inclusive
+ end
+end
+
+while gets
+ if ($. == firstnum) .. ($. == lastnum)
+ # operate between firstnum and lastnum line number
+ end
+end
+
+# in ruby versions prior to 1.8, the above two conditional
+# expressions could be shortened to:
+# if /BEGIN/ .. /END/
+# and
+# if firstnum .. lastnum
+# but these now only work this way from the command line
+
+#-----------------------------
+
+while gets
+ if ~/BEGIN/ ... ~/END/
+ # line falls between BEGIN and END on different lines
+ end
+end
+
+while gets
+ if ($. == first) ... ($. == last)
+ # operate between first and last line number on different lines
+ end
+end
+
+#-----------------------------
+# command-line to print lines 15 through 17 inclusive (see below)
+ruby -ne 'print if 15 .. 17' datafile
+
+# print out all <XMP> .. </XMP> displays from HTML doc
+while gets
+ print if ~%r#<XMP>#i .. ~%r#</XMP>#i;
+end
+
+# same, but as shell command
+# ruby -ne 'print if %r#<XMP>#i .. %r#</XMP>#i' document.html
+#-----------------------------
+# ruby -ne 'BEGIN { $top=3; $bottom=5 }; \
+# print if $top .. $bottom' /etc/passwd # FAILS
+# ruby -ne 'BEGIN { $top=3; $bottom=5 }; \
+# print if $. == $top .. $. == $bottom' /etc/passwd # works
+# ruby -ne 'print if 3 .. 5' /etc/passwd # also works
+#-----------------------------
+print if ~/begin/ .. ~/end/;
+print if ~/begin/ ... ~/end/;
+#-----------------------------
+while gets
+ $in_header = $. == 1 .. ~/^$/ ? true : false
+ $in_body = ~/^$/ .. ARGF.eof ? true : false
+end
+#-----------------------------
+seen = {}
+ARGF.each do |line|
+ next unless line =~ /^From:?\s/i .. line =~ /^$/;
+ line.scan(%r/([^<>(),;\s]+\@[^<>(),;\s]+)/).each do |addr|
+ puts addr unless seen[addr]
+ seen[addr] ||= 1
+ end
+end
+
+
+# @@PLEAC@@_6.9
+def glob2pat(globstr)
+ patmap = {
+ '*' => '.*',
+ '?' => '.',
+ '[' => '[',
+ ']' => ']',
+ }
+ globstr.gsub!(/(.)/) { |c| patmap[c] || Regexp::escape(c) }
+ '^' + globstr + '$'
+end
+
+
+# @@PLEAC@@_6.10
+# avoid interpolating patterns like this if the pattern
+# isn't going to change:
+pattern = ARGV.shift
+ARGF.each do |line|
+ print line if line =~ /#{pattern}/
+end
+
+# the above creates a new regex each iteration. Instead,
+# use the /o modifier so the regex is compiled only once
+
+pattern = ARGV.shift
+ARGF.each do |line|
+ print line if line =~ /#{pattern}/o
+end
+
+#-----------------------------
+
+#!/usr/bin/ruby
+# popgrep1 - grep for abbreviations of places that say "pop"
+# version 1: slow but obvious way
+popstates = %w(CO ON MI WI MN)
+ARGF.each do |line|
+ popstates.each do |state|
+ if line =~ /\b#{state}\b/
+ print line
+ last
+ end
+ end
+end
+
+#-----------------------------
+#!/usr/bin/ruby
+# popgrep2 - grep for abbreviations of places that say "pop"
+# version 2: eval strings; fast but hard to quote
+popstates = %w(CO ON MI WI MN)
+code = "ARGF.each do |line|\n"
+popstates.each do |state|
+ code += "\tif line =~ /\\b#{state}\\b/; print(line); next; end\n"
+end
+code += "end\n"
+print "CODE IS\n---\n#{code}\n---\n" if false # turn on for debugging
+eval code
+
+# CODE IS
+# ---
+# ARGF.each do |line|
+# if line =~ /\bCO\b/; print(line); next; end
+# if line =~ /\bON\b/; print(line); next; end
+# if line =~ /\bMI\b/; print(line); next; end
+# if line =~ /\bWI\b/; print(line); next; end
+# if line =~ /\bMN\b/; print(line); next; end
+# end
+#
+# ---
+
+## alternatively, the same idea as above but compiling
+## to a case statement: (not in perlcookbook)
+#!/usr/bin/ruby -w
+# popgrep2.5 - grep for abbreviations of places that say "pop"
+# version 2.5: eval strings; fast but hard to quote
+popstates = %w(CO ON MI WI MN)
+code = "ARGF.each do |line|\n case line\n"
+popstates.each do |state|
+ code += " when /\\b#{state}\\b/ : print line\n"
+end
+code += " end\nend\n"
+print "CODE IS\n---\n#{code}\n---\n" if false # turn on for debugging
+eval code
+
+# CODE IS
+# ---
+# ARGF.each do |line|
+# case line
+# when /\bCO\b/ : print line
+# when /\bON\b/ : print line
+# when /\bMI\b/ : print line
+# when /\bWI\b/ : print line
+# when /\bMN\b/ : print line
+# end
+# end
+#
+# ---
+
+# Note: (above) Ruby 1.8+ allows the 'when EXP : EXPR' on one line
+# with the colon separator.
+
+#-----------------------------
+#!/usr/bin/ruby
+# popgrep3 - grep for abbreviations of places that say "pop"
+# version3: build a match_any function
+popstates = %w(CO ON MI WI MN)
+expr = popstates.map{|e|"line =~ /\\b#{e}\\b/"}.join('||')
+eval "def match_any(line); #{expr};end"
+ARGF.each do |line|
+ print line if match_any(line)
+end
+#-----------------------------
+
+## building a match_all function is a trivial
+## substitution of && for ||
+## here is a generalized example:
+#!/usr/bin/ruby -w
+## grepauth - print lines that mention both foo and bar
+class MultiMatch
+ def initialize(*patterns)
+ _any = build_match('||',patterns)
+ _all = build_match('&&',patterns)
+ eval "def match_any(line);#{_any};end\n"
+ eval "def match_all(line);#{_all};end\n"
+ end
+ def build_match(sym,args)
+ args.map{|e|"line =~ /#{e}/"}.join(sym)
+ end
+end
+
+mm = MultiMatch.new('foo','bar')
+ARGF.each do |line|
+ print line if mm.match_all(line)
+end
+#-----------------------------
+
+#!/usr/bin/ruby
+# popgrep4 - grep for abbreviations of places that say "pop"
+# version4: pretty fast, but simple: compile all re's first:
+popstates = %w(CO ON MI WI MN)
+popstates = popstates.map{|re| %r/\b#{re}\b/}
+ARGF.each do |line|
+ popstates.each do |state_re|
+ if line =~ state_re
+ print line
+ break
+ end
+ end
+end
+
+## speeds trials on the jargon file(412): 26006 lines, 1.3MB
+## popgrep1 => 7.040s
+## popgrep2 => 0.656s
+## popgrep2.5 => 0.633s
+## popgrep3 => 0.675s
+## popgrep4 => 1.027s
+
+# unless speed is criticial, the technique in popgrep4 is a
+# reasonable balance between speed and logical simplicity.
+
+
+# @@PLEAC@@_6.11
+begin
+ print "Pattern? "
+ pat = $stdin.gets.chomp
+ Regexp.new(pat)
+rescue
+ warn "Invalid Pattern"
+ retry
+end
+
+
+# @@PLEAC@@_6.13
+# uses the 'amatch' extension found on:
+# http://raa.ruby-lang.org/project/amatch/
+require 'amatch'
+matcher = Amatch.new('balast')
+#$relative, $distance = 0, 1
+File.open('/usr/share/dict/words').each_line do |line|
+ print line if matcher.search(line) <= 1
+end
+__END__
+#CODE
+ballast
+ballasts
+balustrade
+balustrades
+blast
+blasted
+blaster
+blasters
+blasting
+blasts
+
+
+# @@PLEAC@@_6.14
+str.scan(/\G(\d)/).each do |token|
+ puts "found #{token}"
+end
+#-----------------------------
+n = " 49 here"
+n.gsub!(/\G /,'0')
+puts n
+#-----------------------------
+str = "3,4,5,9,120"
+str.scan(/\G,?(\d+)/).each do |num|
+ puts "Found number: #{num}"
+end
+#-----------------------------
+# Ruby doesn't have the String.pos or a /c re modifier like Perl
+# But it does have StringScanner in the standard library (strscn)
+# which allows similar functionality:
+
+require 'strscan'
+text = 'the year 1752 lost 10 days on the 3rd of September'
+sc = StringScanner.new(text)
+while sc.scan(/.*?(\d+)/)
+ print "found: #{sc[1]}\n"
+end
+if sc.scan(/\S+/)
+ puts "Found #{sc[0]} after last number"
+end
+#-----------------------------
+# assuming continuing from above:
+puts "The position in 'text' is: #{sc.pos}"
+sc.pos = 30
+puts "The position in 'text' is: #{sc.pos}"
+
+
+# @@PLEAC@@_6.15
+#-----------------------------
+# greedy pattern
+str.gsub!(/<.*>/m,'') # not good
+
+# non-greedy (minimal) pattern
+str.gsub!(/<.*?>/m,'') # not great
+
+
+#-----------------------------
+#<b><i>this</i> and <i>that</i> are important</b> Oh, <b><i>me too!</i></b>
+#-----------------------------
+%r{ <b><i>(.*?)</i></b> }mx
+#-----------------------------
+%r/BEGIN((?:(?!BEGIN).)*)END/
+#-----------------------------
+%r{ <b><i>( (?: (?!</b>|</i>). )* ) </i></b> }mx
+#-----------------------------
+%r{ <b><i>( (?: (?!</[ib]>). )* ) </i></b> }mx
+#-----------------------------
+%r{
+ <b><i>
+ [^<]* # stuff not possibly bad, and not possibly the end.
+ (?:
+ # at this point, we can have '<' if not part of something bad
+ (?! </?[ib]> ) # what we can't have
+ < # okay, so match the '<'
+ [^<]* # and continue with more safe stuff
+ ) *
+ </i></b>
+ }mx
+
+
+# @@PLEAC@@_6.16
+#-----------------------------
+$/ = ""
+ARGF.each do |para|
+ para.scan %r/
+ \b # start at word boundary
+ (\S+) # find chunk of non-whitespace
+ \b # until a word boundary
+ (
+ \s+ # followed by whitespace
+ \1 # and that same chunk again
+ \b # and a word boundary
+ ) + # one or more times
+ /xi do
+ puts "dup word '#{$1}' at paragraph #{$.}"
+ end
+end
+#-----------------------------
+astr = 'nobody'
+bstr = 'bodysnatcher'
+if "#{astr} #{bstr}" =~ /^(\w+)(\w+) \2(\w+)$/
+ print "#{$2} overlaps in #{$1}-#{$2}-#{$3}"
+end
+#-----------------------------
+#!/usr/bin/ruby -w
+# prime_pattern -- find prime factors of argument using patterns
+ARGV << 180
+cap = 'o' * ARGV.shift
+while cap =~ /^(oo+?)\1+$/
+ print $1.size, " "
+ cap.gsub!(/#{$1}/,'o')
+end
+puts cap.size
+#-----------------------------
+#diophantine
+# solve for 12x + 15y + 16z = 281, maximizing x
+if ('o' * 281).match(/^(o*)\1{11}(o*)\2{14}(o*)\3{15}$/)
+ x, y, z = $1.size, $2.size, $3.size
+ puts "One solution is: x=#{x}; y=#{y}; z=#{z}"
+else
+ puts "No solution."
+end
+# => One solution is: x=17; y=3; z=2
+
+#-----------------------------
+# using different quantifiers:
+('o' * 281).match(/^(o+)\1{11}(o+)\2{14}(o+)\3{15}$/)
+# => One solution is: x=17; y=3; z=2
+
+('o' * 281).match(/^(o*?)\1{11}(o*)\2{14}(o*)\3{15}$/)
+# => One solution is: x=0; y=7; z=11
+
+('o' * 281).match(/^(o+?)\1{11}(o*)\2{14}(o*)\3{15}$/)
+# => One solution is: x=1; y=3; z=14
+
+
+# @@PLEAC@@_6.17
+# alpha OR beta
+%r/alpha|beta/
+
+# alpha AND beta
+%r/(?=.*alpha)(?=.*beta)/m
+
+# alpha AND beta, no overlap
+%r/alpha.*beta|beta.*alpha/m
+
+# NOT beta
+%r/^(?:(?!beta).)*$/m
+
+# NOT bad BUT good
+%r/(?=(?:(?!BAD).)*$)GOOD/m
+#-----------------------------
+
+if !(string =~ /pattern/) # ugly
+ something()
+end
+
+if string !~ /pattern/ # preferred
+ something()
+end
+
+
+#-----------------------------
+if string =~ /pat1/ && string =~ /pat2/
+ something()
+end
+#-----------------------------
+if string =~ /pat1/ || string =~ /pat2/
+ something()
+end
+#-----------------------------
+#!/usr/bin/ruby -w
+# minigrep - trivial grep
+pat = ARGV.shift
+ARGF.each do |line|
+ print line if line =~ /#{pat}/o
+end
+#-----------------------------
+ "labelled" =~ /^(?=.*bell)(?=.*lab)/m
+#-----------------------------
+$string =~ /bell/ && $string =~ /lab/
+#-----------------------------
+$murray_hill = "blah bell blah "
+if $murray_hill =~ %r{
+ ^ # start of string
+ (?= # zero-width lookahead
+ .* # any amount of intervening stuff
+ bell # the desired bell string
+ ) # rewind, since we were only looking
+ (?= # and do the same thing
+ .* # any amount of intervening stuff
+ lab # and the lab part
+ )
+ }mx # /m means . can match newline
+
+ print "Looks like Bell Labs might be in Murray Hill!\n";
+end
+#-----------------------------
+"labelled" =~ /(?:^.*bell.*lab)|(?:^.*lab.*bell)/
+#-----------------------------
+$brand = "labelled";
+if $brand =~ %r{
+ (?: # non-capturing grouper
+ ^ .*? # any amount of stuff at the front
+ bell # look for a bell
+ .*? # followed by any amount of anything
+ lab # look for a lab
+ ) # end grouper
+ | # otherwise, try the other direction
+ (?: # non-capturing grouper
+ ^ .*? # any amount of stuff at the front
+ lab # look for a lab
+ .*? # followed by any amount of anything
+ bell # followed by a bell
+ ) # end grouper
+ }mx # /m means . can match newline
+ print "Our brand has bell and lab separate.\n";
+end
+#-----------------------------
+$map =~ /^(?:(?!waldo).)*$/s
+#-----------------------------
+$map = "the great baldo"
+if $map =~ %r{
+ ^ # start of string
+ (?: # non-capturing grouper
+ (?! # look ahead negation
+ waldo # is he ahead of us now?
+ ) # is so, the negation failed
+ . # any character (cuzza /s)
+ ) * # repeat that grouping 0 or more
+ $ # through the end of the string
+ }mx # /m means . can match newline
+ print "There's no waldo here!\n";
+end
+=begin
+ 7:15am up 206 days, 13:30, 4 users, load average: 1.04, 1.07, 1.04
+
+USER TTY FROM LOGIN@ IDLE JCPU PCPU WHAT
+
+tchrist tty1 5:16pm 36days 24:43 0.03s xinit
+
+tchrist tty2 5:19pm 6days 0.43s 0.43s -tcsh
+
+tchrist ttyp0 chthon 7:58am 3days 23.44s 0.44s -tcsh
+
+gnat ttyS4 coprolith 2:01pm 13:36m 0.30s 0.30s -tcsh
+=end
+#% w | minigrep '^(?!.*ttyp).*tchrist'
+#-----------------------------
+%r{
+ ^ # anchored to the start
+ (?! # zero-width look-ahead assertion
+ .* # any amount of anything (faster than .*?)
+ ttyp # the string you don't want to find
+ ) # end look-ahead negation; rewind to start
+ .* # any amount of anything (faster than .*?)
+ tchrist # now try to find Tom
+}x
+#-----------------------------
+#% w | grep tchrist | grep -v ttyp
+#-----------------------------
+#% grep -i 'pattern' files
+#% minigrep '(?i)pattern' files
+#-----------------------------
+
+
+# @@PLEAC@@_6.20
+ans = $stdin.gets.chomp
+re = %r/^#{Regexp.quote(ans)}/
+case
+ when "SEND" =~ re : puts "Action is send"
+ when "STOP" =~ re : puts "Action is stop"
+ when "ABORT" =~ re : puts "Action is abort"
+ when "EDIT" =~ re : puts "Action is edit"
+end
+#-----------------------------
+require 'abbrev'
+table = Abbrev.abbrev %w-send stop abort edit-
+loop do
+ print "Action: "
+ ans = $stdin.gets.chomp
+ puts "Action for #{ans} is #{table[ans.downcase]}"
+end
+
+
+#-----------------------------
+# dummy values are defined for 'file', 'PAGER', and
+# the 'invoke_editor' and 'deliver_message' methods
+# do not do anything interesting in this example.
+#!/usr/bin/ruby -w
+require 'abbrev'
+
+file = 'pleac_ruby.data'
+PAGER = 'less'
+
+def invoke_editor
+ puts "invoking editor"
+end
+
+def deliver_message
+ puts "delivering message"
+end
+
+actions = {
+ 'edit' => self.method(:invoke_editor),
+ 'send' => self.method(:deliver_message),
+ 'list' => proc {system(PAGER, file)},
+ 'abort' => proc {puts "See ya!"; exit},
+ "" => proc {puts "Unknown Command"}
+}
+
+dtable = Abbrev.abbrev(actions.keys)
+loop do
+ print "Action: "
+ ans = $stdin.gets.chomp.delete(" \t")
+ actions[ dtable[ans.downcase] || "" ].call
+end
+
+
+# @@PLEAC@@_6.19
+#-----------------------------
+# basically, the Perl Cookbook categorizes this as an
+# unsolvable problem ...
+#-----------------------------
+1 while addr.gsub!(/\([^()]*\)/,'')
+#-----------------------------
+Dear someuser@host.com,
+
+Please confirm the mail address you gave us Wed May 6 09:38:41
+MDT 1998 by replying to this message. Include the string
+"Rumpelstiltskin" in that reply, but spelled in reverse; that is,
+start with "Nik...". Once this is done, your confirmed address will
+be entered into our records.
+
+
+# @@PLEAC@@_6.21
+#-----------------------------
+#% gunzip -c ~/mail/archive.gz | urlify > archive.urlified
+#-----------------------------
+#% urlify ~/mail/*.inbox > ~/allmail.urlified
+#-----------------------------
+#!/usr/bin/ruby -w
+# urlify - wrap HTML links around URL-like constructs
+
+urls = '(https?|telnet|gopher|file|wais|ftp)';
+ltrs = '\w';
+gunk = '/#~:.?+=&%@!\-';
+punc = '.:?\-';
+any = "#{ltrs}#{gunk}#{punc}";
+
+ARGF.each do |line|
+ line.gsub! %r/
+ \b # start at word boundary
+ ( # begin $1 {
+ #{urls} : # need resource and a colon
+ [#{any}] +? # followed by on or more
+ # of any valid character, but
+ # be conservative and take only
+ # what you need to....
+ ) # end $1 }
+ (?= # look-ahead non-consumptive assertion
+ [#{punc}]* # either 0 or more punctuation
+ [^#{any}] # followed by a non-url char
+ | # or else
+ $ # then end of the string
+ )
+ /iox do
+ %Q|<A HREF="#{$1}">#{$1}</A>|
+ end
+ print line
+end
+
+
+# @@PLEAC@@_6.23
+%r/^m*(d?c{0,3}|c[dm])(l?x{0,3}|x[lc])(v?i{0,3}|i[vx])$/i
+#-----------------------------
+str.sub!(/(\S+)(\s+)(\S+)/, '\3\2\1')
+#-----------------------------
+%r/(\w+)\s*=\s*(.*)\s*$/ # keyword is $1, value is $2
+#-----------------------------
+%r/.{80,}/
+#-----------------------------
+%r|(\d+)/(\d+)/(\d+) (\d+):(\d+):(\d+)|
+#-----------------------------
+str.gsub!(%r|/usr/bin|,'/usr/local/bin')
+#-----------------------------
+str.gsub!(/%([0-9A-Fa-f][0-9A-Fa-f])/){ $1.hex.chr }
+#-----------------------------
+str.gsub!(%r{
+ /\* # Match the opening delimiter
+ .*? # Match a minimal number of characters
+ \*/ # Match the closing delimiter
+}xm,'')
+#-----------------------------
+str.sub!(/^\s+/, '')
+str.sub!(/\s+$/, '')
+
+# but really, in Ruby we'd just do:
+str.strip!
+#-----------------------------
+str.gsub!(/\\n/,"\n")
+#-----------------------------
+str.sub!(/^.*::/, '')
+#-----------------------------
+%r/^([01]?\d\d|2[0-4]\d|25[0-5])\.([01]?\d\d|2[0-4]\d|25[0-5])\.
+ ([01]?\d\d|2[0-4]\d|25[0-5])\.([01]?\d\d|2[0-4]\d|25[0-5])$/x
+#-----------------------------
+str.sub!(%r|^.*/|, '')
+#-----------------------------
+cols = ( (ENV['TERMCAP'] || " ") =~ /:co#(\d+):/ ) ? $1 : 80;
+#-----------------------------
+name = " #{$0} #{ARGV}".gsub(%r| /\S+/|, ' ')
+#-----------------------------
+require 'rbconfig'
+include Config
+raise "This isn't Linux" unless CONFIG['target_os'] =~ /linux/i;
+#-----------------------------
+str.gsub!(%r/\n\s+/, ' ')
+#-----------------------------
+nums = str.scan(/(\d+\.?\d*|\.\d+)/)
+#-----------------------------
+capwords = str.scan(%r/(\b[^\Wa-z0-9_]+\b)/)
+#-----------------------------
+lowords = str.scan(%r/(\b[^\WA-Z0-9_]+\b)/)
+#-----------------------------
+icwords = str.scan(%r/(\b[^\Wa-z0-9_][^\WA-Z0-9_]*\b)/)
+#-----------------------------
+links = str.scan(%r/<A[^>]+?HREF\s*=\s*["']?([^'" >]+?)[ '"]?>/mi)
+#-----------------------------
+initial = str =~ /^\S+\s+(\S)\S*\s+\S/ ? $1 : ""
+#-----------------------------
+str.gsub!(%r/"([^"]*)"/, %q-``\1''-)
+#-----------------------------
+
+$/ = ""
+sentences = []
+ARGF.each do |para|
+ para.gsub!(/\n/, ' ')
+ para.gsub!(/ {3,}/,' ')
+ sentences << para.scan(/(\S.*?[!?.])(?= |\Z)/)
+end
+
+#-----------------------------
+%r/(\d{4})-(\d\d)-(\d\d)/ # YYYY in $1, MM in $2, DD in $3
+#-----------------------------
+%r/ ^
+ (?:
+ 1 \s (?: \d\d\d \s)? # 1, or 1 and area code
+ | # ... or ...
+ \(\d\d\d\) \s # area code with parens
+ | # ... or ...
+ (?: \+\d\d?\d? \s)? # optional +country code
+ \d\d\d ([\s\-]) # and area code
+ )
+ \d\d\d (\s|\1) # prefix (and area code separator)
+ \d\d\d\d # exchange
+ $
+ /x
+#-----------------------------
+%r/\boh\s+my\s+gh?o(d(dess(es)?|s?)|odness|sh)\b/i
+#-----------------------------
+lines = []
+lines << $1 while input.sub!(/^([^\012\015]*)(\012\015?|\015\012?)/,'')
+
+
+# @@PLEAC@@_7.0
+# An IO object being Enumerable, we can use 'each' directly on it
+File.open("/usr/local/widgets/data").each { |line|
+ puts line if line =~ /blue/
+}
+
+logfile = File.new("/var/log/rubylog.txt", "w")
+mysub($stdin, logfile)
+
+# The method IO#readline is similar to IO#gets
+# but throws an exception when it reaches EOF
+f = File.new("bla.txt")
+begin
+ while (line = f.readline)
+ line.chomp
+ $stdout.print line if line =~ /blue/
+ end
+rescue EOFError
+ f.close
+end
+
+while $stdin.gets # reads from STDIN
+ unless (/\d/)
+ $stderr.puts "No digit found." # writes to STDERR
+ end
+ puts "Read: #{$_}" # writes to STDOUT
+end
+
+logfile = File.new("/tmp/log", "w")
+
+logfile.close
+
+# $defout (or its synonym '$>') is the destination of output
+# for Kernel#print, Kernel#puts, and family functions
+logfile = File.new("log.txt", "w")
+old = $defout
+$defout = logfile # switch to logfile for output
+puts "Countdown initiated ..."
+$defout = old # return to original output
+puts "You have 30 seconds to reach minimum safety distance."
+
+
+# @@PLEAC@@_7.1
+source = File.new(path, "r") # open file "path" for reading only
+sink = File.new(path, "w") # open file "path" for writing only
+
+source = File.open(path, File::RDONLY) # open file "path" for reading only
+sink = File.open(path, File::WRONLY) # open file "path" for writing only
+
+file = File.open(path, "r+") # open "path" for reading and writing
+file = File.open(path, flags) # open "path" with the flags "flags" (see examples below for flags)
+
+# open file "path" read only
+file = File.open(path, "r")
+file = File.open(path, File::RDONLY)
+
+# open file "path" write only, create it if it does not exist
+# truncate it to zero length if it exists
+file = File.open(path, "w")
+file = File.open(path, File::WRONLY|File::TRUNC|File::CREAT)
+file = File.open(path, File::WRONLY|File::TRUNC|File::CREAT, 0666) # with permission 0666
+
+# open file "path" write only, fails if file exists
+file = File.open(path, File::WRONLY|File::EXCL|File::CREAT)
+file = File.open(path, File::WRONLY|File::EXCL|File::CREAT, 0666)
+
+# open file "path" for appending
+file = File.open(path, "a")
+file = File.open(path, File::WRONLY|File::APPEND|File::CREAT)
+file = File.open(path, File::WRONLY|File::APPEND|File::CREAT, 0666)
+
+# open file "path" for appending only when file exists
+file = File.open(path, File::WRONLY|File::APPEND)
+
+# open file "path" for reading and writing
+file = File.open(path, "r+")
+file = File.open(path, File::RDWR)
+
+# open file for reading and writing, create a new file if it does not exist
+file = File.open(path, File::RDWR|File::CREAT)
+file = File.open(path, File::RDWR|File::CREAT, 0600)
+
+# open file "path" reading and writing, fails if file exists
+file = File.open(path, File::RDWR|File::EXCL|File::CREAT)
+file = File.open(path, File::RDWR|File::EXCL|File::CREAT, 0600)
+
+
+# @@PLEAC@@_7.2
+# No problem with Ruby since the filename doesn't contain characters with
+# special meaning; like Perl's sysopen
+File.open(filename, 'r')
+
+
+# @@PLEAC@@_7.3
+File.expand_path('~root/tmp')
+#=> "/root/tmp"
+File.expand_path('~rpcuser')
+#=> "/var/lib/nfs"
+
+# To expand ~/.. it explicitely needs the environment variable HOME
+File.expand_path('~/tmp')
+#=> "/home/gc/tmp"
+
+
+# @@PLEAC@@_7.4
+# The exception raised in Ruby reports the filename
+File.open('afile')
+
+
+# @@PLEAC@@_7.5
+# Standard Ruby distribution provides the following useful extension
+require 'tempfile'
+# With the Tempfile class, the file is automatically deleted on garbage
+# collection, so you won't need to remove it, later on.
+tf = Tempfile.new('tmp') # a name is required to create the filename
+
+# If you need to pass the filename to an external program you can use
+# File#path, but don't forget to File#flush in order to flush anything
+# living in some buffer somewhere.
+tf.flush
+system("/usr/bin/dowhatever #{tf.path}")
+
+fh = Tempfile.new('tmp')
+fh.sync = true # autoflushes
+10.times { |i| fh.puts i }
+fh.rewind
+puts 'Tmp file has: ', fh.readlines
+
+
+# @@PLEAC@@_7.6
+while (DATA.gets) do
+ # process the line
+end
+__END__
+# your data goes here
+# __DATA__ doesn't exist in Ruby
+
+#CODE
+# get info about the script (size, date of last modification)
+kilosize = DATA.stat.size / 1024
+last_modif = DATA.stat.mtime
+puts "<P>Script size is #{kilosize}"
+puts "<P>Last script update: #{last_modif}"
+__END__
+# DO NOT REMOVE THE PRECEEDING LINE.
+# Everything else in this file will be ignored.
+#CODE
+
+
+# @@PLEAC@@_7.7
+while line = gets do
+ # do something with line.
+end
+
+# or
+while gets do
+ # do something with $_
+end
+
+# or more rubyish
+$stdun.each do |line|
+ # do stuff with line
+end
+
+
+# ARGF may makes this more easy
+# this is skipped if ARGV.size==0
+ARGV.each do |filename|
+ # closing and exception handling are done by the block
+ open(filename) do |fd|
+ fd.each do |line|
+ # do stuff with line
+ end
+ end rescue abort("can't open %s" % filename)
+end
+
+# globbing is done in the Dir module
+ARGV = Dir["*.[Cch]"] if ARGV.empty?
+
+# note: optparse is the preferred way to handle this
+if (ARGV[0] == '-c')
+ chop_first += 1
+ ARGV.shift
+end
+
+
+# processing numerical options
+if ARGV[0] =~ /^-(\d+)$/
+ columns = $1
+ ARGV.shift
+end
+
+# again, better to use optparse:
+require 'optparse'
+nostdout = 0
+append = 0
+unbuffer = 0
+ignore_ints = 0
+ARGV.options do |opt|
+ opt.on('-n') { nostdout +=1 }
+ opt.on('-a') { append +=1 }
+ opt.on('-u') { unbuffer +=1 }
+ opt.on('-i') { ignore_ints +=1 }
+ opt.parse!
+end or abort("usage: " + __FILE__ + " [-ainu] [filenames]")
+
+# no need to do undef $/, we have File.read
+str = File.read(ARGV[0])
+
+# again we have File.read
+str = File.read(ARGV[0])
+
+# not sure what this should do:
+# I believe open the file, print filename, lineno and line:
+ARGF.each_with_index do |line, idx|
+ print ARGF.filename, ":", idx, ";", line
+end
+
+# print all the lines in every file passed via command line that contains login
+ARGF.each do |line|
+ puts line if line =~ /login/
+end
+#
+# even this would fit
+#%ruby -ne "print if /f/" 2.log
+#
+
+ARGF.each { |l| puts l.downcase! }
+
+#------------------
+#!/usr/bin/ruby -p
+# just like perl's -p
+$_.downcase!
+#
+
+# I don't know who should I trust.
+# perl's version splits on \w+ while python's on \w.
+
+chunks = 0
+
+File.read(ARGV[0]).split.each do |word|
+ next if word =~ /^#/
+ break if ["__DATA__", "__END__"].member? word
+ chunks += 1
+end
+
+print "Found ", chunks, " chunks\n"
+
+
+# @@PLEAC@@_7.8
+old = File.open(old_file)
+new = File.open(new_file, "w")
+while old.gets do
+ # change $_, then...
+ new.print $_
+end
+old.close
+new.close
+File.rename(old_file, "old.orig")
+File.rename(new_file, old_file)
+
+while old.gets do
+ if $. == 20 then # we are at the 20th line
+ new.puts "Extra line 1"
+ new.puts "Extra line 2"
+ end
+ new.print $_
+end
+
+while old.gets do
+ next if 20..30 # skip the 20th line to the 30th
+ # Ruby (and Perl) permit to write if 20..30
+ # instead of if (20 <= $.) and ($. <= 30)
+ new.print $_
+end
+
+
+# @@PLEAC@@_7.9
+#% ruby -i.orig -pe 'FILTER COMMAND' file1 file2 file3 ...
+#
+#-----------------------------
+##!/usr/bin/ruby -i.orig -p
+# filter commands go here
+#-----------------------------
+
+#% ruby -pi.orig -e 'gsub!(/DATE/){Time.now)'
+
+# effectively becomes:
+ARGV << 'I'
+oldfile = ""
+while gets
+ if ARGF.filename != oldfile
+ newfile = ARGF.filename
+ File.rename(newfile, newfile + ".orig")
+ $stdout = File.open(newfile,'w')
+ oldfile = newfile
+ end
+ gsub!(/DATE/){Time.now}
+ print
+end
+$stdout = STDOUT
+#-----------------------------
+#% ruby -i.old -pe 'gsub!(%r{\bhisvar\b}, 'hervar')' *.[Cchy]
+
+#-----------------------------
+# set up to iterate over the *.c files in the current directory,
+# editing in place and saving the old file with a .orig extension
+$-i = '.orig' # set up -i mode
+ARGV.replace(Dir['*.[Cchy]'])
+while gets
+ if $. == 1
+ print "This line should appear at the top of each file\n"
+ end
+ gsub!(/\b(p)earl\b/i, '\1erl') # Correct typos, preserving case
+ print
+ ARGF.close if ARGF.eof
+end
+
+
+# @@PLEAC@@_7.10
+File.open('itest', 'r+') do |f| # open file for update
+ lines = f.readlines # read into array of lines
+ lines.each do |it| # modify lines
+ it.gsub!(/foo/, 'QQQ')
+ end
+ f.pos = 0 # back to start
+ f.print lines # write out modified lines
+ f.truncate(f.pos) # truncate to new length
+end # file is automatically closed
+#-----------------------------
+File.open('itest', 'r+') do |f|
+ out = ""
+ f.each do |line|
+ out << line.gsub(/DATE/) {Time.now}
+ end
+ f.pos = 0
+ f.print out
+ f.truncate(f.pos)
+end
+
+# @@PLEAC@@_7.11
+File.open('infile', 'r+') do |f|
+ f.flock File::LOCK_EX
+ # update file
+end
+#-----------------------------
+File::LOCK_SH # shared lock (for reading)
+File::LOCK_EX # exclusive lock (for writing)
+File::LOCK_NB # non-blocking request
+File::LOCK_UN # free lock
+#-----------------------------
+unless f.flock File::LOCK_EX | File::LOCK_NB
+ warn "can't get immediate lock: blocking ..."
+ f.flock File::LOCK_EX
+end
+#-----------------------------
+File.open('numfile', File::RDWR|File::CREAT) do |f|
+ f.flock(File::LOCK_EX)
+ num = f.gets.to_i || 0
+ f.pos = 0
+ f.truncate 0
+ f.puts num + 1q
+end
+
+
+# @@PLEAC@@_7.12
+output_handle.sync = true
+# Please note that like in Perl, $stderr is already unbuffered
+#-----------------------------
+#!/usr/bin/ruby -w
+# seeme - demo stdio output buffering
+$stdout.sync = ARGV.size > 0
+print "Now you don't see it..."
+sleep 2
+puts "now you do"
+#-----------------------------
+$stderr.sync = true
+afile.sync = false
+#-----------------------------
+# assume 'remote_con' is an interactive socket handle,
+# but 'disk_file' is a handle to a regular file.
+remote_con.sync = true # unbuffer for clarity
+disk_file.sync = false # buffered for speed
+#-----------------------------
+require 'socket'
+sock = TCPSocket.new('www.ruby-lang.org', 80)
+sock.sync = true
+sock.puts "GET /en/ HTTP/1.0 \n\n"
+resp = sock.read
+print "DOC IS: #{resp}\n"
+
+
+# @@PLEAC@@_7.13
+#-----------------------------
+# assumes fh1, fh2, fh2 are oen IO objects
+nfound = select([$stdin, fh1, fh2, fh3], nil, nil, 0)
+nfound[0].each do |file|
+ case file
+ when fh1
+ # do something with fh1
+ when fh2
+ # do something with fh2
+ when fh3
+ # do something with fh3
+ end
+end
+#-----------------------------
+input_files = []
+# repeat next line for all in-files to poll
+input_files << fh1
+if nfound = select(input_files, nil, nil, 0)
+ # input ready on files in nfound[0]
+end
+
+
+# @@PLEAC@@_8.0
+#-----------------------------
+# datafile is a file or IO object
+datafile.readlines.each { |line|
+ line.chomp!
+ size = line.length
+ puts size
+}
+#-----------------------------
+datafile.readlines.each { |line|
+ puts line.chomp!.length
+}
+#-----------------------------
+lines = datafile.readlines
+#-----------------------------
+whole_file = file.read
+#-----------------------------
+# ruby -040 -e 'word = gets; puts "First word is #{word}"'
+#-----------------------------
+# ruby -ne 'BEGIN { $/="%%\n" }; $_.chomp; puts $_ if( $_=~/Unix/i)' fortune.dat
+#-----------------------------
+handle.print "one", "two", "three" # "onetwothree"
+puts "Baa baa black sheep." # sent to $stdout
+#-----------------------------
+buffer = handle.read(4096)
+rv = buffer.length
+#-----------------------------
+handle.truncate(length)
+open("/tmp#{$$}.pid", 'w') { |handle| handle.truncate(length) }
+#-----------------------------
+pos = datafile.pos # tell is an alias of pos
+puts "I'm #{pos} bytes from the start of datafile"
+#-----------------------------
+logfile.seek(0, IO::SEEK_END)
+datafile.seek(pos) # IO::SEEK_SET is the default
+out.seek(-20, IO::SEEK_CUR)
+#-----------------------------
+written = datafile.syswrite(mystring)
+raise RunTimeError unless written == mystring.length
+block = infile.sysread(256) # no equivalent to perl offset parameter in sysread
+puts "only read #{block.length} bytes" if 256 != block.length
+#-----------------------------
+pos = handle.sysseek(0, IO::SEEK_CUR) # don't change position
+
+
+# @@PLEAC@@_8.1
+while (line = fh.gets)
+ line.chomp!
+ nextline = nil
+ line.gsub!(/\\$/) { |match| nextline = fh.gets; '' }
+ if (nextline != nil)
+ line += nextline
+ redo
+ end
+ # process full record in line here
+end
+#-----------------------------
+# DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) \
+# $(TEXINFOS) $(INFOS) $(MANS) $(DATA)
+# DEP_DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) \
+# $(TEXINFOS) $(INFO_DEPS) $(MANS) $(DATA) \
+# $(EXTRA_DIST)
+#-----------------------------
+line.gsub!(/\\\s*$/, '') {
+ # as before
+}
+
+
+# @@PLEAC@@_8.2
+#-----------------------------
+count = `wc -l < #{filename}`
+fail "wc failed: #{$?}" if $? != 0
+count.chomp!
+#-----------------------------
+count = 0
+File.open(file, 'r') { |fh|
+ count += 1 while fh.gets
+}
+# count now holds the number of lines read
+#-----------------------------
+count = 0
+while (chunk = file.sysread(2**16))
+ count += chunk.count("\n")
+end rescue EOFError
+#-----------------------------
+File.open(filename,'r') { |fh|
+ count += 1 while fh.gets
+}
+# count now holds the number of lines read
+#-----------------------------
+# As ruby doesn't quite have an equivalent to using a for
+# statement as in perl, I threw this in
+count = File.readlines(filename).size
+#-----------------------------
+1 while file.gets
+count = $.
+#-----------------------------
+$/ = ''
+open(filename, 'r') { |fh|
+ 1 while fh.gets
+ para_count = $.
+} rescue fail("can't open #{filename}: $!")
+#-----------------------------
+
+
+# ^^PLEAC^^_8.3
+#-----------------------------
+while (gets)
+ split.each { |chunk|
+ # do something with chunk
+ }
+end
+#-----------------------------
+while (gets)
+ gsub(/(\w[\w'-]*)/) { |word|
+ # do something with word
+ }
+end
+#-----------------------------
+# Make a word frequency count
+# normally hashes can be created using {} or just Hash.new
+# but we want the default value of an entry to be 0 instead
+# of nil. (nil can't be incremented)
+seen = Hash.new(0)
+while (gets)
+ gsub(/(\w[\w'-]*)/) { |word|
+ seen[word.downcase] += 1
+ }
+end
+# output hash in a descending numeric sort of its values
+seen.sort { |a,b| b[1] <=> a[1] }.each do |k,v|
+ printf("%5d %s\n", v, k )
+end
+
+#-----------------------------
+# Line frequency count
+seen = Hash.new(0)
+while (gets)
+ seen[$_.downcase] += 1
+end
+seen.sort { |a,b| b[1] <=> a[1] }.each do |k,v|
+ printf("%5d %s\n", v, k )
+end
+#-----------------------------
+
+
+# @@PLEAC@@_8.4
+#-----------------------------
+# instead of file handle FILE, we can just
+# use a string containing the filename
+File.readlines(file).each { |line|
+ # do something with line
+}
+#-----------------------------
+File.readlines(file).reverse_each { |line|
+ # do something with line
+}
+#-----------------------------
+# the variable lines might have been created
+# this way
+# lines = File.readlines(file)
+#
+# normally one would use the reverse_each, but
+# if you insist on using a numerical index to
+# iterate over the lines array...
+(lines.size - 1).downto(0) { |i|
+ line = lines[i]
+}
+#-----------------------------
+# the second readlines argument is a the
+# record separator $/, just like perl, a blank
+# separator splits the records into paragraphs
+File.readlines(file, '').each { |paragraph|
+ # do something with paragraph
+ puts "->Paragraph #{paragraph}"
+}
+#-----------------------------
+
+
+# @@PLEAC@@_8.6
+
+$/ = "%\n";
+srand;
+
+File.open('/usr/share/fortune/humorists').each do |line|
+ adage = line if rand($.) < 1
+end
+
+puts adage;
+
+
+# @@PLEAC@@_8.10
+begin
+ fh = File.open(file, "r+")
+ addr = fh.tell unless fh.eof while fh.gets
+ fh.truncate(addr)
+rescue SystemCallError
+ $stderr.puts "#$!"
+end
+
+
+# @@PLEAC@@_9.0
+entry = File.stat("/usr/bin/vi")
+entry = File.stat("/usr/bin")
+entry = File.stat(INFILE)
+
+entry = File.stat("/usr/bin/vi")
+ctime = entry.ctime
+size = entry.size
+
+f = File.open(filename, "r")
+
+## There is no -T equivalent in Ruby, but we can still test emptiness
+if test(?s, filename)
+ puts "#{filename} doesn't have text in it."
+ exit
+end
+
+Dir.new("/usr/bin").each do |filename|
+ puts "Inside /usr/bin is something called #{filename}"
+end
+
+
+# @@PLEAC@@_9.1
+file = File.stat("filename")
+readtime, writetime = file.atime, file.mtime
+file.utime(readtime, writetime)
+
+SECONDS_PER_DAY = 60 * 60 * 24
+file = File.stat("filename")
+atime, mtime = file.atime, file.mtime
+
+atime -= 7 * SECONDS_PER_DAY
+mtime -= 7 * SECONDS_PER_DAY
+
+File.utime(atime, mtime, file)
+mtime = File.stat(file).mtime
+File.utime(Time.new, mtime, file)
+File.utime(Time.new, File.stat("testfile").mtime, file)
+
+#-----------------------------
+#!/usr/bin/ruby -w
+## uvi - vi a file without changing it's access times
+
+if ARGV.length != 1
+ puts "usage: uvi filename"
+ exit
+end
+file = ARGV[0]
+atime, mtime = File.stat(file).atime, File.stat(file).mtime
+system(ENV["EDITOR"] || "vi", file)
+File.utime(atime, mtime, file)
+#-----------------------------
+
+
+# @@PLEAC@@_9.2
+File.unlink(FILENAME)
+
+err_flg = false
+filenames.each do |file|
+ begin
+ File.unlink(file)
+ rescue
+ err_flg = $!
+ end
+end
+err_flg and raise "Couldn't unlink all of #{filenames.join(" ")}: #{err_flg}"
+
+File.unlink(file)
+
+count = filenames.length
+filenames.each do |file|
+ begin
+ File.unlink(file)
+ rescue
+ count -= 1
+ end
+end
+if count != filenames.length
+ STDERR.puts "could only delete #{count} of #{filenames.length} files"
+end
+
+
+# @@PLEAC@@_9.3
+require "ftools"
+File.copy(oldfile, newfile)
+
+infile = File.open(oldfile, "r")
+outfile = File.open(newfile, "w")
+
+blksize = infile.stat.blksize
+# This doesn't handle partial writes or ^Z
+# like the Perl version does.
+while (line = infile.read(blksize))
+ outfile.write(line)
+end
+
+infile.close
+outfile.close
+
+system("cp #{oldfile} #{newfile}") # unix
+system("copy #{oldfile} #{newfile}") # dos, vms
+
+require "ftools"
+File.copy("datafile.dat", "datafile.bak")
+File.move("datafile.new", "datafile.dat")
+
+
+# @@PLEAC@@_9.4
+$seen = {} # must use global var to be seen inside of method below
+
+def do_my_thing(filename)
+ dev, ino = File.stat(filename).dev, File.stat(filename).ino
+ unless $seen[[dev, ino]]
+ # do something with $filename because we haven't
+ # seen it before
+ end
+ $seen[[dev, ino]] = $seen[[dev, ino]].to_i + 1
+end
+
+files.each do |filename|
+ dev, ino = File.stat(filename).dev, File.stat(filename).ino
+ if !$seen.has_key?([dev, ino])
+ $seen[[dev, ino]] = []
+ end
+ $seen[[dev, ino]].push(filename)
+end
+
+$seen.keys.sort.each do |devino|
+ ino, dev = devino
+ if $seen[devino].length > 1
+ # $seen[devino] is a list of filenames for the same file
+ end
+end
+
+
+# @@PLEAC@@_9.5
+Dir.open(dirname) do |dir|
+ dir.each do |file|
+ # do something with dirname/file
+ puts file
+ end
+end
+# Dir.close is automatic
+
+# No -T equivalent in Ruby
+
+dir.each do |file|
+ next if file =~ /^\.\.?$/
+ # ...
+end
+
+def plainfiles(dir)
+ dh = Dir.open(dir)
+ dh.entries.grep(/^[^.]/).
+ map {|file| "#{dir}/#{file}"}.
+ find_all {|file| test(?f, file)}.
+ sort
+end
+
+
+# @@PLEAC@@_9.6
+list = Dir.glob("*.c")
+
+dir = Dir.open(path)
+files = dir.entries.grep(/\.c$/)
+dir.close
+
+files = Dir.glob("*.c")
+files = Dir.open(path).entries.grep(/\.[ch]$/i)
+
+dir = Dir.new(path)
+files = dir.entries.grep(/\.[ch]$/i)
+
+begin
+ d = Dir.open(dir)
+rescue Errno::ENOENT
+ raise "Couldn't open #{dir} for reading: #{$!}"
+end
+
+files = []
+d.each do |file|
+ puts file
+ next unless file =~ /\.[ch]$/i
+
+ filename = "#{dir}/#{file}"
+ # There is no -T equivalent in Ruby, but we can still test emptiness
+ files.push(filename) if test(?s, filename)
+end
+
+dirs.entries.grep(/^\d+$/).
+ map { |file| [file, "#{path}/#{file}"]} .
+ select { |file| test(?d, file[1]) }.
+ sort { |a,b| a[0] <=> b[0] }.
+ map { |file| file[1] }
+
+
+# @@PLEAC@@_9.7
+require 'find'
+Find.find(dirlist) do |file|
+ # do whatever
+end
+
+require 'find'
+argv = ARGV.empty? ? %w{.} : ARGV
+Find.find(*argv) do |file|
+ print file, (test(?d, file) ? "/\n" : "\n")
+end
+
+require 'find'
+argv = ARGV.empty? ? %w{.} : ARGV
+sum = 0
+Find.find(*argv) do |file|
+ size = test(?s, file) || 0
+ sum += size
+end
+puts "#{argv.join(' ')} contains #{sum} bytes"
+
+require 'find'
+argv = ARGV.empty? ? %w{.} : ARGV
+saved_size, saved_name = -1, ""
+Find.find(*argv) do |file|
+ size = test(?s, file) || 0
+ next unless test(?f, file) && size > saved_size
+ saved_size = size
+ saved_name = file
+end
+puts "Biggest file #{saved_name} in #{argv.join(' ')} is #{saved_size}"
+
+require 'find'
+argv = ARGV.empty? ? %w{.} : ARGV
+age, name = nil
+Find.find(*argv) do |file|
+ mtime = File.stat(file).mtime
+ next if age && age > mtime
+ age = mtime
+ name = file
+end
+puts "#{name} #{age}"
+
+#-----------------------------
+#!/usr/bin/ruby -w
+# fdirs - find all directories
+require 'find'
+argv = ARGV.empty? ? %w{.} : ARGV
+File.find(*argv) { |file| puts file if test(?d, file) }
+#-----------------------------
+
+
+# @@PLEAC@@_9.8
+require 'fileutils'
+
+puts "Usage #{$0} dir ..." if ARGV.empty?
+ARGV.each do |dir|
+ FileUtils.rmtree(dir)
+end
+
+
+# @@PLEAC@@_9.9
+require 'ftools'
+names.each do |file|
+ newname = file
+ begin
+ File.move(file, newname)
+ rescue Errno::EPERM
+ $stderr.puts "Couldn't rename #{file} to #{newname}: #{$!}"
+ end
+end
+
+require 'ftools'
+op = ARGV.empty? ? (raise "Usage: rename expr [files]\n") : ARGV.shift
+argv = ARGV.empty? ? $stdin.readlines.map { |f| f.chomp } : ARGV
+argv.each do |file|
+ was = file
+ file = eval("file.#{op}")
+ File.move(was, file) unless was == file
+end
+
+
+# @@PLEAC@@_9.10
+base = File.basename(path)
+dir = File.dirname(path)
+# ruby has no fileparse equivalent
+dir, base = File.split(path)
+ext = base.scan(/\..*$/).to_s
+
+path = '/usr/lib/libc.a'
+file = File.basename(path)
+dir = File.dirname(path)
+
+puts "dir is #{dir}, file is #{file}"
+# dir is /usr/lib, file is libc.a
+
+path = '/usr/lib/libc.a'
+dir, filename = File.split(path)
+name, ext = filename.split(/(?=\.)/)
+puts "dir is #{dir}, name is #{name}, ext is #{ext}"
+# NOTE: The Ruby code prints
+# dir is /usr/lib, name is libc, extension is .a
+# while the Perl code prints a '/' after the directory name
+# dir is /usr/lib/, name is libc, extension is .a
+
+# No fileparse_set_fstype() equivalent in ruby
+
+def extension(path)
+ ext = path.scan(/\..*$/).to_s
+ ext.sub(/^\./, "")
+end
+
+
+# @@PLEAC@@_9.11
+#-----------------------------
+#!/usr/bin/ruby -w
+# symirror - build spectral forest of symlinks
+
+require 'find'
+require 'fileutils'
+
+raise "usage: #{$0} realdir mirrordir" unless ARGV.size == 2
+
+srcdir,dstdir = ARGV
+srcmode = File::stat(srcdir).mode
+Dir.mkdir(dstdir, srcmode & 07777) unless test(?d, dstdir)
+
+# fix relative paths
+Dir.chdir(srcdir) {srcdir = Dir.pwd}
+Dir.chdir(dstdir) {dstdir = Dir.pwd}
+
+Find.find(srcdir) do |srcfile|
+ if test(?d, srcfile)
+ dest = srcfile.sub(/^#{srcdir}/, dstdir)
+ dmode = File::stat(srcfile).mode & 07777
+ Dir.mkdir(dest, dmode) unless test(?d, dest)
+ a = Dir["#{srcfile}/*"].reject{|f| test(?d, f)}
+ FileUtils.ln_s(a, dest)
+ end
+end
+
+
+# @@PLEAC@@_9.12
+# we use the Getopt/Declare library here for convenience:
+# http://raa.ruby-lang.org/project/getoptdeclare/
+#-----------------------------
+#!/usr/bin/ruby -w
+# lst - list sorted directory contents (depth first)
+
+require 'find'
+require 'etc'
+require "Getopt/Declare"
+
+# Note: in the option-spec below there must by at least one hard
+# tab in between each -option and its description. For example
+# -i <tab> read from stdin
+
+opts = Getopt::Declare.new(<<'EOPARAM')
+ ============
+ Input Format:
+ -i read from stdin
+ ============
+ Output Format:
+ -l long listing
+ -r reverse listing
+ ============
+ Sort on: (one of)
+ -m mtime (modify time - default)
+ {$sort_criteria = :mtime}
+ -u atime (access time)
+ {$sort_criteria = :atime}
+ -c ctime (inode change time)
+ {$sort_criteria = :ctime}
+ -s size
+ {$sort_criteria = :size}
+ [mutex: -m -u -c -s]
+
+EOPARAM
+
+$sort_criteria ||= :mtime
+files = {}
+DIRS = opts['-i'] ? $stdin.readlines.map{|f|f.chomp!} : ARGV
+DIRS.each do |dir|
+ Find.find(dir) do |ent|
+ files[ent] = File::stat(ent)
+ end
+end
+entries = files.keys.sort_by{|f| files[f].send($sort_criteria)}
+entries = entries.reverse unless opts['-r']
+
+entries.each do |ent|
+ unless opts['-l']
+ puts ent
+ next
+ end
+ stats = files[ent]
+ ftime = stats.send($sort_criteria == :size ? :mtime : $sort_criteria)
+ printf "%6d %04o %6d %8s %8s %8d %s %s\n",
+ stats.ino,
+ stats.mode & 07777,
+ stats.nlink,
+ ETC::PASSWD[stats.uid].name,
+ ETC::GROUP[stats.gid].name,
+ stats.size,
+ ftime.strftime("%a %b %d %H:%M:%S %Y"),
+ ent
+end
+
+
+# @@PLEAC@@_10.0
+def hello
+ $greeted += 1 # in Ruby, a variable beginning with $ is global (can be any type of course)
+ puts "hi there!"
+end
+
+# We need to initialize $greeted before it can be used, because "+=" is waiting a Numeric object
+$greeted = 0
+hello # note that appending () is optional to function calls with no parameters
+
+
+# @@PLEAC@@_10.1
+# In Ruby, parameters are named anyway
+def hypotenuse(side1, side2)
+ Math.sqrt(side1**2 + side2**2) # the sqrt function comes from the Math module
+end
+diag = hypotenuse(3, 4)
+
+puts hypotenuse(3, 4)
+
+a = [3, 4]
+print hypotenuse(*a) # the star operator will magically convert an Array into a "tuple"
+
+both = men + women
+
+# In Ruby, all objects are references, so the same problem arises; we then return a new object
+nums = [1.4, 3.5, 6.7]
+def int_all(n)
+ n.collect { |v| v.to_i }
+end
+ints = int_all(nums)
+
+nums = [1.4, 3.5, 6.7]
+def trunc_em(n)
+ n.collect! { |v| v.to_i } # the bang-version of collect modifies the object
+end
+trunc_em(nums)
+
+# Ruby has two chomp version:
+# ``chomp'' chomps the record separator and returns what's expected
+# ``chomp!'' does the same but also modifies the parameter object
+
+
+# @@PLEAC@@_10.2
+def somefunc
+ variable = something # variable is local by default
+end
+
+name, age = ARGV
+start = fetch_time
+
+a, b = pair # will succeed if pair is an Array object (like ARGV is)
+c = fetch_time
+
+# In ruby, run_check can't access a, b, or c until they are
+# explicitely defined global (using leading $), even if they are
+# both defined in the same scope
+
+def check_x(x)
+ y = "whatever"
+ run_check
+ if $condition
+ puts "got $x"
+ end
+end
+
+# The following will keep a reference to the array, though the
+# results will be slightly different from perl: the last element
+# of $global_array will be itself an array
+def save_array(ary)
+ $global_array << ary
+end
+
+# The following gives the same results as in Perl for $global_array,
+# though it doesn't illustrate anymore the way to keep a reference
+# to an object: $global_array is extended with the elements of ary
+def save_array(ary)
+ $global_array += ary
+end
+
+
+# @@PLEAC@@_10.3
+# In Ruby, AFAIK a method cannot access "local variables" defined
+# upper scope; mostly because everything is an object, so you'll
+# do the same by defining an attribute or a static attribute
+
+# In Ruby the BEGIN also exists:
+BEGIN { puts "hello from BEGIN" }
+puts "hello from main"
+BEGIN { puts "hello from 2nd BEGIN" }
+# gives:
+# hello from BEGIN
+# hello from 2nd BEGIN
+# hello from main
+
+# In Ruby, it can be written as a static method and a static
+# variable
+class Counter
+ @@counter = 0
+ def Counter.next_counter; @@counter += 1; end
+end
+
+# There is no need of BEGIN since the variable will get
+# initialized when parsing
+class Counter
+ @@counter = 42
+ def Counter.next_counter; @@counter += 1; end
+ def Counter.prev_counter; @@counter -= 1; end
+end
+
+
+# @@PLEAC@@_10.4
+# You can either get the whole trace as an array of strings, each
+# string telling which file, line and method is calling:
+caller
+
+# ...or only the last caller
+caller[0]
+
+# We need to extract just the method name of the backtrace:
+def whoami; caller()[0] =~ /in `([^']+)'/ ? $1 : '(anonymous)'; end
+def whowasi; caller()[1] =~ /in `([^']+)'/ ? $1 : '(anonymous)'; end
+
+
+# @@PLEAC@@_10.5
+# In Ruby, every value is a reference on an object, thus there is
+# no such problem
+array_diff(array1, array2)
+
+def add_vecpair(a1, a2)
+ results = []
+ a1.each_index { |i| results << (a1[i] + a2[i]) }
+ results
+end
+a = [1, 2]
+b = [5, 8]
+c = add_vecpair(a, b)
+p c
+
+# Add this to the beginning of the function to check if we were
+# given two arrays
+a1.type == Array && a2.type == Array or
+ raise "usage: add_vecpair array1 array2 (was used with: #{a1.type} #{a2.type})"
+
+
+# @@PLEAC@@_10.6
+# There is no return context in Ruby
+
+
+# @@PLEAC@@_10.7
+# Like in Perl, we need to fake with a hash, but it's dirty :-(
+def thefunc(param_args)
+ args = { 'INCREMENT' => '10s', 'FINISH' => '0', 'START' => 0 }
+ args.update(param_args)
+ if (args['INCREMENT'] =~ /m$/ )
+ # .....
+ end
+end
+
+thefunc({ 'INCREMENT' => '20s', 'START' => '+5m', 'FINISH' => '+30m' })
+thefunc({})
+
+
+# @@PLEAC@@_10.8
+# there is no "undef" direct equivalent but there is the slice equiv:
+a, c = func.indexes(0, 2)
+
+
+# @@PLEAC@@_10.9
+# Ruby has no such limitation:
+def somefunc
+ ary = []
+ hash = {}
+ # ...
+ return ary, hash
+end
+arr, dict = somefunc
+
+array_of_hashes = fn
+h1, h2, h3 = fn
+
+
+# @@PLEAC@@_10.10
+return
+# or (equivalent)
+return nil
+
+
+# @@PLEAC@@_10.11
+# You can't prototype in Ruby regarding types :-(
+# Though, you can force the number of arguments:
+def func_with_no_arg; end
+def func_with_no_arg(); end
+def func_with_one_arg(a1); end
+def func_with_two_args(a1, a2); end
+def func_with_any_number_of_args(*args); end
+
+
+# @@PLEAC@@_10.12
+raise "some message" # raise exception
+
+begin
+ val = func
+rescue Exception => msg
+ $stderr.puts "func raised an exception: #{msg}"
+end
+
+# In Ruby the rescue statement uses an exception class, every
+# exception which is not matched is still continuing
+begin
+ val = func
+rescue FullMoonError
+ ...
+end
+
+
+# @@PLEAC@@_10.13
+# Saving Global Values
+# Of course we can just save the value and restore it later:
+def print_age
+ puts "Age is #{$age}"
+end
+
+$age = 18 # global variable
+print_age()
+if condition
+ safeage = $age
+ $age = 23
+ print_age()
+ $age = safeage
+end
+
+# We can also use a method that saves the global variable and
+# restores it automatically when the block is left:
+
+def local(var)
+ eval("save = #{var.id2name}")
+ begin
+ result = yield
+ ensure
+ # we want to call this even if we got an exception
+ eval("#{var.id2name} = save")
+ end
+ result
+end
+
+condition = true
+$age = 18
+print_age()
+if condition
+ local(:$age) {
+ $age = 23
+ print_age()
+ }
+end
+print_age()
+
+# There is no need to use local() for filehandles or directory
+# handles in ruby because filehandles are normal objects.
+
+
+# @@PLEAC@@_10.14
+# In Ruby you may redefine a method [but not overload it :-(]
+# just by defining again with the same name.
+def foo; puts 'foo'; end
+def foo; puts 'bar'; end
+foo
+#=> bar
+
+# You can also take a reference to an existing method before
+# redefining a new one, using the `alias' keyword
+def foo; puts 'foo'; end
+alias foo_orig foo
+def foo; puts 'bar'; end
+foo_orig
+foo
+#=> foo
+#=> bar
+
+# AFAIK, there is no direct way to create a new method whose name
+# comes from a variable, so use "eval"
+colors = %w(red blue green yellow orange purple violet)
+colors.each { |c|
+ eval <<-EOS
+ def #{c}(*a)
+ "<FONT COLOR='#{c}'>" + a.to_s + "</FONT>"
+ end
+ EOS
+}
+
+
+# @@PLEAC@@_10.15
+def method_missing(name, *args)
+ "<FONT COLOR='#{name}'>" + args.join(' ') + "</FONT>"
+end
+puts chartreuse("stuff")
+
+
+# @@PLEAC@@_10.16
+def outer(arg)
+ x = arg + 35
+ inner = proc { x * 19 }
+ x + inner.call()
+end
+
+
+# @@PLEAC@@_10.17
+#!/usr/bin/ruby -w
+# mailsort - sort mbox by different criteria
+require 'English'
+require 'Date'
+
+# Objects of class Mail represent a single mail.
+class Mail
+ attr_accessor :no
+ attr_accessor :subject
+ attr_accessor :fulltext
+ attr_accessor :date
+
+ def initialize
+ @fulltext = ""
+ @subject = ""
+ end
+
+ def append(para)
+ @fulltext << para
+ end
+
+ # this is called if you call puts(mail)
+ def to_s
+ @fulltext
+ end
+end
+
+# represents a list of mails.
+class Mailbox < Array
+
+ Subjectpattern = Regexp.new('Subject:\s*(?:Re:\s*)*(.*)\n')
+ Datepattern = Regexp.new('Date:\s*(.*)\n')
+
+ # reads mails from open file and stores them
+ def read(file)
+ $INPUT_RECORD_SEPARATOR = '' # paragraph reads
+ msgno = -1
+ file.each { |para|
+ if para =~ /^From/
+ mail = Mail.new
+ mail.no = (msgno += 1)
+ md = Subjectpattern.match(para)
+ if md
+ mail.subject = md[1]
+ end
+ md = Datepattern.match(para)
+ if md
+ mail.date = DateTime.parse(md[1])
+ else
+ mail.date = DateTime.now
+ end
+ self.push(mail)
+ end
+ mail.append(para) if mail
+ }
+ end
+
+ def sort_by_subject_and_no
+ self.sort_by { |m|
+ [m.subject, m.no]
+ }
+ end
+
+ # sorts by a list of attributs of mail, given as symbols
+ def sort_by_attributs(*attrs)
+ # you can sort an Enumerable by an array of
+ # values, they would be compared
+ # from ary[0] to ary[n]t, say:
+ # ['b',1] > ['a',10] > ['a',9]
+ self.sort_by { |elem|
+ attrs.map { |attr|
+ elem.send(attr)
+ }
+ }
+ end
+
+end
+
+mailbox = Mailbox.new
+mailbox.read(ARGF)
+
+# print only subjects sorted by subject and number
+for m in mailbox.sort_by_subject_and_no
+ puts(m.subject)
+end
+
+# print complete mails sorted by date, then subject, then number
+for m in mailbox.sort_by_attributs(:date, :subject)
+ puts(m)
+end
+
+
+# @@PLEAC@@_11.7
+def mkcounter(count)
+ start = count
+ bundle = {
+ "NEXT" => proc { count += 1 },
+ "PREV" => proc { count -= 1 },
+ "RESET" => proc { count = start }
+ }
+ bundle["LAST"] = bundle["PREV"]
+ return bundle
+end
+
+c1 = mkcounter(20)
+c2 = mkcounter(77)
+
+puts "next c1: #{c1["NEXT"].call}" # 21
+puts "next c2: #{c2["NEXT"].call}" # 78
+puts "next c1: #{c1["NEXT"].call}" # 22
+puts "last c1: #{c1["PREV"].call}" # 21
+puts "last c1: #{c1["LAST"].call}" # 20
+puts "old c2: #{c2["RESET"].call}" # 77
+
+
+# @@PLEAC@@_11.15
+class Binary_tree
+ def initialize(val)
+ @value = val
+ @left = nil
+ @right = nil
+ end
+
+ # insert given value into proper point of
+ # provided tree. If no tree provided,
+ # use implicit pass by reference aspect of @_
+ # to fill one in for our caller.
+ def insert(val)
+ if val < @value then
+ if @left then
+ @left.insert(val)
+ else
+ @left = Binary_tree.new(val)
+ end
+ elsif val > @value then
+ if @right then
+ @right.insert(val)
+ else
+ @right = Binary_tree.new(val)
+ end
+ else
+ puts "double"
+ # do nothing, no double values
+ end
+ end
+
+ # recurse on left child,
+ # then show current value,
+ # then recurse on right child.
+ def in_order
+ @left.in_order if @left
+ print @value, " "
+ @right.in_order if @right
+ end
+
+ # show current value,
+ # then recurse on left child,
+ # then recurse on right child.
+ def pre_order
+ print @value, " "
+ @left.pre_order if @left
+ @right.pre_order if @right
+ end
+
+ # recurse on left child,
+ # then recurse on right child,
+ # then show current value.
+ def post_order
+ @left.post_order if @left
+ @right.post_order if @right
+ print @value, " "
+ end
+
+ # find out whether provided value is in the tree.
+ # if so, return the node at which the value was found.
+ # cut down search time by only looking in the correct
+ # branch, based on current value.
+ def search(val)
+ if val == @value then
+ return self
+ elsif val < @value then
+ return @left.search(val) if @left
+ return nil
+ else
+ return @right.search(val) if @right
+ return nil
+ end
+ end
+end
+
+# first generate 20 random inserts
+test = Binary_tree.new(0)
+for a in 0..20
+ test.insert(rand(1000))
+end
+
+# now dump out the tree all three ways
+print "Pre order: "; test.pre_order; puts ""
+print "In order: "; test.in_order; puts ""
+print "Post order: "; test.post_order; puts ""
+
+print "search?"
+while gets
+ print test.search($_.to_i)
+ print "\nsearch?"
+end
+
+
+# @@PLEAC@@_12.0
+# class and module names need to have the first letter capitalized
+module Alpha
+ NAME = 'first'
+end
+module Omega
+ NAME = 'last'
+end
+puts "Alpha is #{Alpha::NAME}, Omega is #{Omega::NAME}"
+
+# ruby doesn't differentiate beteen compile-time and run-time
+require 'getoptlong.rb'
+require 'getoptlong' # assumes the .rb
+require 'cards/poker.rb'
+require 'cards/poker' # assumes the .rb
+load 'cards/poker' # require only loads the file once
+
+module Cards
+ module Poker
+ @card_deck = Array.new # or @card_deck = []
+ def shuffle
+ end
+ end
+end
+
+
+# @@PLEAC@@_12.1
+# a module exports all of its functions
+module Your_Module
+ def self.function
+ # this would be called as Your_Module.function
+ end
+
+ def Your_Module.another
+ # this is the same as above, but more specific
+ end
+end
+
+# @@PLEAC@@_12.2
+begin
+ require 'nonexistent'
+rescue LoadError
+ puts "Couldn't load #{$!}" # $! contains the last error string
+end
+
+# @@PLEAC@@_12.4
+# module variables are private unless access functions are defined
+module Alpha
+ @aa = 10
+ @bb = 11
+
+ def self.put_aa
+ puts @aa
+ end
+
+ def self.bb=(val)
+ @bb = val
+ end
+end
+
+Alpha.bb = 12
+# Alpha.aa = 10 # error, no aa=method
+
+
+# @@PLEAC@@_12.5
+# caller provides a backtrace of the call stack
+module MyModule
+ def find_caller
+ caller
+ end
+
+ def find_caller2(i)
+ caller(i) # an argument limits the size of the stack returned
+ end
+end
+
+
+# @@PLEAC@@_12.6
+BEGIN {
+ $logfile = '/tmp/mylog' unless defined? $logfile
+ $LF = File.open($logfile, 'a')
+}
+
+module Logger
+ def self.logmsg(msg)
+ $LF.puts msg
+ end
+
+ logmsg('startup')
+end
+
+END {
+ Logger::logmsg('shutdown')
+ $LF.close
+}
+
+
+# @@PLEAC@@_12.7
+#-----------------------------
+# results may be different on your system
+# % ruby -e "$LOAD_PATH.each_index { |i| printf("%d %s\n", i, $LOAD_PATH[i] }
+#0 /usr/local/lib/site_ruby/1.6
+#1 /usr/local/lib/site_ruby/1.6/i386-linux
+#2 /usr/local/lib/site_ruby/
+#3 /usr/lib/ruby/1.6
+#4 /usr/lib/ruby/1.6/i136-linux
+#5 .
+#-----------------------------
+# syntax for sh, bash, ksh, or zsh
+#$ export RUBYLIB=$HOME/rubylib
+
+# syntax for csh or tcsh
+# % setenv RUBYLIB ~/rubylib
+#-----------------------------
+$LOAD_PATH.unshift "/projects/spectre/lib";
+
+
+# @@PLEAC@@_12.8
+# equivalents in ruby are mkmf, SWIG, or Ruby/DL depending on usage
+
+
+# @@PLEAC@@_12.9
+# no equivalent in ruby
+
+
+# @@PLEAC@@_12.10
+# no equivalent in ruby
+
+
+# @@PLEAC@@_12.11
+module FineTime
+ def self.time
+ # to be defined later
+ end
+end
+
+
+module FineTime
+ def self.time
+ "its a fine time"
+ end
+end
+
+puts FineTime.time #=> "its a fine time"
+
+
+# @@PLEAC@@_12.12
+def even_only(n)
+ raise "#{n} is not even" if (n & 1) != 0 # one way to test
+ # ...
+end
+def even_only(n)
+ $stderr.puts "#{n} is not even" if (n & 1) != 0
+ # ...
+end
+
+
+# @@PLEAC@@_12.17
+# The library archive for ruby is called Ruby Application archive,
+# or shorter RAA, and can be found at http://raa.ruby-lang.org.
+# A typical library is installed like this:
+# % gunzip some-module-4.54.tar.gz
+# % tar xf some-module-4.54.tar
+# % cd some-module-4.54.tar
+# % ruby install.rb config
+# % ruby install.rb setup
+# get superuser previleges here if needed for next step
+# % ruby install.rb install
+
+# Some modules use a different process,
+# you should find details in the documentation
+# Here is an example of such a different process
+# % ruby extconf.rb
+# % make
+# % make install
+
+# If you want the module installed in your own directory:
+# For ruby version specific libraries
+# % ruby install.rb config --site-ruby=~/lib
+# For version independent libraries
+# % ruby install.rb config --site-ruby-common=~/lib
+
+# Information about possible options for config
+# % ruby install.rb --help
+
+# If you have your own complete distribution
+# % ruby install.rb --prefix=path=~/ruby-private
+
+
+# @@PLEAC@@_13.0
+# Classes and objects in Ruby are rather straigthforward
+class Person
+ # Class variables (also called static attributes) are prefixed by @@
+ @@person_counter=0
+
+ # object constructor
+ def initialize(age, name, alive = true) # Default arg like in C++
+ @age, @name, @alive = age, name, alive # Object attributes are prefixed by '@'
+ @@person_counter += 1
+ # There is no '++' operator in Ruby. The '++'/'--' operators are in fact
+ # hidden assignments which affect variables, not objects. You cannot accomplish
+ # assignment via method. Since everything in Ruby is object, '++' and '--'
+ # contradict Ruby OO ideology. Instead '-=' and '+=' are used.
+ end
+
+ attr_accessor :name, :age # This creates setter and getter methods for @name
+ # and @age. See 13.3 for detailes.
+
+ # methods modifying the receiver object usually have the '!' suffix
+ def die!
+ @alive = false
+ puts "#{@name} has died at the age of #{@age}."
+ @alive
+ end
+
+ def kill(anotherPerson)
+ print @name, ' is killing ', anotherPerson.name, ".\n"
+ anotherPerson.die!
+ end
+
+ # methods used as queries
+ # usually have the '?' suffix
+ def alive?
+ @alive && true
+ end
+
+ def year_of_birth
+ Time.now.year - @age
+ end
+
+ # Class method (also called static method)
+ def Person.number_of_people
+ @@person_counter
+ end
+end
+
+# Using the class:
+# Create objects of class Person
+lecter = Person.new(47, 'Hannibal')
+starling = Person.new(29, 'Clarice', true)
+pazzi = Person.new(40, 'Rinaldo', true)
+
+# Calling a class method
+print "There are ", Person.number_of_people, " Person objects\n"
+
+print pazzi.name, ' is ', (pazzi.alive?) ? 'alive' : 'dead', ".\n"
+lecter.kill(pazzi)
+print pazzi.name, ' is ', (pazzi.alive?) ? 'alive' : 'dead', ".\n"
+
+print starling.name , ' was born in ', starling.year_of_birth, "\n"
+
+
+# @@PLEAC@@_13.1
+# If you don't need any initialisation in the constructor,
+# you don't need to write a constructor.
+class MyClass
+end
+
+class MyClass
+ def initialize
+ @start = Time.new
+ @age = 0
+ end
+end
+
+class MyClass
+ def initialize(inithash)
+ @start = Time.new
+ @age = 0
+ for key, value in inithash
+ instance_variable_set("@#{key}", value)
+ end
+ end
+end
+
+# @@PLEAC@@_13.2
+# Objects are destroyed by the garbage collector.
+# The time of destroying is not predictable.
+# The ruby garbage collector can handle circular references,
+# so there is no need to write destructor for that.
+
+# There is no direct support for destructor.
+# You can call a custom function, or more specific a proc object, when the
+# garbage collector is about to destruct the object, but it is unpredictable
+# when this occurs.
+# Also if such a finalizer object has a reference to the orignal object,
+# this may prevent the original object to get garbage collected.
+# Because of this problem the finalize method below is
+# a class method and not a instance method.
+# So if you need to free resources for an object, like
+# closing a socket or kill a spawned subprocess,
+# you should do it explicitly.
+
+class MyClass
+ def initialize
+ ObjectSpace.define_finalizer(self,
+ self.class.method(:finalize).to_proc)
+ end
+ def MyClass.finalize(id)
+ puts "Object #{id} dying at #{Time.new}"
+ end
+end
+
+# test code
+3.times {
+ MyClass.new
+}
+ObjectSpace.garbage_collect
+
+
+# @@PLEAC@@_13.3
+# You can write getter and setter methods in a natural way:
+class Person
+ def name
+ @name
+ end
+ def name=(name)
+ @name = name
+ end
+end
+
+# But there is a better and shorter way
+class Person
+ attr_reader :age
+ attr_writer :name
+ # attr_reader and attr_writer are actually methods in class Class
+ # which set getter and setter methods for you.
+end
+
+# There is also attr_accessor to create both setters and getters
+class Person
+ attr_accessor :age, :name
+end
+
+
+# @@PLEAC@@_13.4
+class Person
+ # Class variables (also called static attributes) are prefixed by @@
+ @@person_counter = 0
+
+ def Person.population
+ @@person_counter
+ end
+ def initialize
+ @@person_counter += 1
+ ObjectSpace.define_finalizer(self,
+ self.class.method(:finalize).to_proc)
+ end
+ def Person.finalize(id)
+ @@person_counter -= 1
+ end
+end
+people = []
+10.times {
+ people.push(Person.new)
+}
+printf("There are %d people alive", Person.population)
+
+
+FixedArray.class_max_bounds = 100
+alpha = FixedArray.new
+puts "Bound on alpha is #{alpha.max_bounds}"
+
+beta = FixedArray.new
+beta.max_bounds = 50 # calls the instance method
+beta.class.class_max_bounds = 50 # alternative, calls the class method
+puts "Bound on alpha is #{alpha.max_bounds}"
+
+class FixedArray
+ @@bounds = 7
+
+ def max_bounds
+ @@max_bounds
+ end
+ # instance method, which sets the class variable
+ def max_bounds=(value)
+ @@max_bounds = value
+ end
+ # class method. This can only be called on a class,
+ # but not on the instances
+ def FixedArray.class_max_bounds=(value)
+ @@max_bounds = value
+ end
+end
+
+
+# @@PLEAC@@_13.5
+PersonStruct = Struct.new("Person", :name, :age, :peers)
+# creates a class "Person::Struct", which is accessiable with the
+# constant "PersonStruct"
+p = PersonStruct.new
+p = Struct::Person.new # alternative using the classname
+p.name = "Jason Smythe"
+p.age = 13
+p.peers = ["Wilbur", "Ralph", "Fred"]
+p[:peers] = ["Wilbur", "Ralph", "Fred"] # alternative access using symbol
+p["peers"] = ["Wilbur", "Ralph", "Fred"] # alternative access using name of field
+p[2] = ["Wilbur", "Ralph", "Fred"] # alternative access using index of field
+puts "At age #{p.age}, #{p.name}'s first friend is #{p.peers[0]}"
+
+# The fields of a struct have no special type, like other ruby variables
+# you can put any objects in. Therefore the discussions how to specify
+# the types of the fields do not apply to ruby.
+
+FamilyStruct = Struct.new("Family", :head, :address, :members)
+folks = FamilyStruct.new
+folks.head = PersonStruct.new
+dad = folks.head
+dad.name = "John"
+dad.age = 34
+
+# supply of own accessor method for the struct for error checking
+class PersonStruct
+ def age=(value)
+ if !value.kind_of?(Integer)
+ raise(ArgumentError, "Age #{value} isn't an Integer")
+ elsif value > 150
+ raise(ArgumentError, "Age #{value} is unreasonable")
+ end
+ @age = value
+ end
+end
+
+
+# @@PLEAC@@_13.6
+# The ruby Object class defines a dup and a clone method.
+# The dup method is recommended for prototype object creation.
+# The default implementation makes a shallow copy,
+# but each class can override it, for example to make a deep copy.
+
+# If you want to call 'new' directly on the instances,
+# you can create a instance method "new", which returns a new duplicate.
+# This method is distinct from the class method new.
+#
+class A
+ def new
+ dup
+ end
+end
+
+ob1 = A.new
+# later on
+ob2 = ob1.new
+
+
+# @@PLEAC@@_13.7
+methname = 'flicker'
+obj.send(methname, 10) # calls obj.flicker(10)
+
+# call three methods on the object, by name
+['start', 'run', 'stop'].each do |method_string|
+ obj.send(method_string)
+end
+
+# Another way is to create a Method object
+method_obj = obj.method('flicker')
+# And then call it
+method_obj.call(10)
+
+
+# @@PLEAC@@_13.8
+# All classes in Ruby inherit from class Object
+# and thus all objects share methods defined in this class
+
+# the class of the object
+puts any_object.type
+
+# Ruby classes are actually objects of class Class and they
+# respond to methods defined in Object class as well
+
+# the superclass of this class
+puts any_object.class.superclass
+
+# ask an object whether it is an instance of particular class
+n = 4.7
+puts n.instance_of?(Float) # true
+puts n.instance_of?(Numeric) # false
+
+# ask an object whether it is an instance of class, one of the
+# superclasses of the object, or modules included in it
+puts n.kind_of?(Float) # true (the class)
+puts n.kind_of?(Numeric) # true (an ancestor class)
+puts n.kind_of?(Comparable) # true (a mixin module)
+puts n.kind_of?(String) # false
+
+# ask an object whether it can respond to a particular method
+puts n.respond_to?('+') # true
+puts n.respond_to?('length') # false
+
+# all methods an object can respond to
+'just a string'.methods.each { |m| puts m }
+
+
+# @@PLEAC@@_13.9
+# Actually any class in Ruby is inheritable
+class Person
+ attr_accessor :age, :name
+ def initialize
+ @name
+ @age
+ end
+end
+#-----------------------------
+dude = Person.new
+dude.name = 'Jason'
+dude.age = 23
+printf "%s is age %d.\n", dude.name, dude.age
+#-----------------------------
+# Inheriting from Person
+class Employee < Person
+ attr_accessor :salary
+end
+#-----------------------------
+empl = Employee.new
+empl.name = 'Jason'
+empl.age = 23
+empl.salary = 200
+printf "%s is age %d, the salary is %d.\n", empl.name, empl.age, empl.salary
+#-----------------------------
+# Any built-in class can be inherited the same way
+class WeirdString < String
+ def initialize(obj)
+ super obj
+ end
+ def +(anotherObj) # + method in this class is overridden
+ # to return the sum of string lengths
+ self.length + anotherObj.length # 'self' can be omitted
+ end
+end
+#-----------------------------
+a = WeirdString.new('hello')
+b = WeirdString.new('bye')
+
+puts a + b # the overridden +
+#=> 8
+puts a.length # method from the superclass, String
+#=> 5
+
+
+# @@PLEAC@@_13.11
+# In ruby you can override the method_missing method
+# to have a solution similar to perls AUTOLOAD.
+class Person
+
+ def initialize
+ @ok_fields = %w(name age peers parent)
+ end
+
+ def valid_attribute?(name)
+ @ok_fields.include?(name)
+ end
+
+ def method_missing(namesymbol, *params)
+ name = namesymbol.to_s
+ return if name =~ /^A-Z/
+ if name.to_s[-1] == ('='[0]) # we have a setter
+ isSetter = true
+ name.sub!(/=$/, '')
+ end
+ if valid_attribute?(name)
+ if isSetter
+ instance_variable_set("@#{name}", *params)
+ else
+ instance_variable_get("@#{name}", *params)
+ end
+ else
+ # if no annestor is responsible,
+ # the Object class will throw a NoMethodError exception
+ super(namesymbol, *params)
+ end
+ end
+
+ def new
+ kid = Person.new
+ kid.parent = self
+ kid
+ end
+
+end
+
+dad = Person.new
+dad.name = "Jason"
+dad.age = 23
+kid = dad.new
+kid.name = "Rachel"
+kid.age = 2
+puts "Kid's parent is #{kid.parent.name}"
+puts dad
+puts kid
+
+class Employee < Person
+ def initialize
+ super
+ @ok_fields.push("salary", "boss")
+ end
+ def ok_fields
+ @ok_fields
+ end
+end
+
+
+# @@PLEAC@@_13.13
+# The ruby garbage collector pretends to cope with circular structures.
+# You can test it with this code:
+class RingNode
+ attr_accessor :next
+ attr_accessor :prev
+ attr_reader :name
+
+ def initialize(aName)
+ @name = aName
+ ObjectSpace.define_finalizer(self,
+ self.class.method(:finalize).to_proc)
+ end
+
+ def RingNode.finalize(id)
+ puts "Node #{id} dying"
+ end
+
+ def RingNode.show_all_objects
+ ObjectSpace.each_object {|id|
+ puts id.name if id.class == RingNode
+ }
+ end
+end
+
+def create_test
+ a = RingNode.new("Node A")
+ b = RingNode.new("Node B")
+ c = RingNode.new("Node C")
+ a.next = b
+ b.next = c
+ c.next = a
+ a.prev = c
+ c.prev = b
+ b.prev = a
+
+ a = nil
+ b = nil
+ c = nil
+end
+
+create_test
+RingNode.show_all_objects
+ObjectSpace.garbage_collect
+puts "After garbage collection"
+RingNode.show_all_objects
+
+
+# @@PLEAC@@_13.14
+class String
+ def <=>(other)
+ self.casecmp other
+ end
+end
+
+# There is no way to directly overload the '""' (stringify)
+# operator in Ruby. However, by convention, classes which
+# can reasonably be converted to a String will define a
+# 'to_s' method as in the TimeNumber class defined below.
+# The 'puts' method will automatcally call an object's
+# 'to_s' method as is demonstrated below.
+# Furthermore, if a class defines a to_str method, an object of that
+# class can be used most any place where the interpreter is looking
+# for a String value.
+
+#---------------------------------------
+# NOTE: Ruby has a builtin Time class which would usually be used
+# to manipulate time objects, the following is supplied for
+# educational purposes to demonstrate operator overloading.
+#
+class TimeNumber
+ attr_accessor :hours,:minutes,:seconds
+ def initialize( hours, minutes, seconds)
+ @hours = hours
+ @minutes = minutes
+ @seconds = seconds
+ end
+
+ def to_s
+ return sprintf( "%d:%02d:%02d", @hours, @minutes, @seconds)
+ end
+
+ def to_str
+ to_s
+ end
+
+ def +( other)
+ seconds = @seconds + other.seconds
+ minutes = @minutes + other.minutes
+ hours = @hours + other.hours
+ if seconds >= 60
+ seconds %= 60
+ minutes += 1
+ end
+ if minutes >= 60
+ minutes %= 60
+ hours += 1
+ end
+ return TimeNumber.new(hours, minutes, seconds)
+ end
+
+ def -(other)
+ raise NotImplementedError
+ end
+
+ def *(other)
+ raise NotImplementedError
+ end
+
+ def /( other)
+ raise NotImplementedError
+ end
+end
+
+t1 = TimeNumber.new(0, 58, 59)
+sec = TimeNumber.new(0, 0, 1)
+min = TimeNumber.new(0, 1, 0)
+puts t1 + sec + min + min
+
+#-----------------------------
+# StrNum class example: Ruby's builtin String class already has the
+# capabilities outlined in StrNum Perl example, however the '*' operator
+# on Ruby's String class acts differently: It creates a string which
+# is the original string repeated N times.
+#
+# Using Ruby's String class as is in this example:
+x = "Red"; y = "Black"
+z = x+y
+r = z*3 # r is "RedBlackRedBlackRedBlack"
+puts "values are #{x}, #{y}, #{z}, and #{r}"
+print "#{x} is ", x < y ? "LT" : "GE", " #{y}\n"
+# prints:
+# values are Red, Black, RedBlack, and RedBlackRedBlackRedBlack
+# Red is GE Black
+
+#-----------------------------
+class FixNum
+ REGEX = /(\.\d*)/
+ DEFAULT_PLACES = 0
+ attr_accessor :value, :places
+ def initialize(value, places = nil)
+ @value = value
+ if places
+ @places = places
+ else
+ m = REGEX.match(value.to_s)
+ if m
+ @places = m[0].length - 1
+ else
+ @places = DEFAULT_PLACES
+ end
+ end
+ end
+
+ def +(other)
+ FixNum.new(@value + other.value, max(@places, other.places))
+ end
+
+ def *(other)
+ FixNum.new(@value * other.value, max(@places, other.places))
+ end
+
+ def /(other)
+ puts "Divide: #{@value.to_f/other.value.to_f}"
+ result = FixNum.new(@value.to_f/other.value.to_f)
+ result.places = max(result.places,other.places)
+ result
+ end
+
+ def to_s
+ sprintf("STR%s: %.*f", self.class.to_s , @places, @value) #.
+ end
+
+ def to_str
+ to_s
+ end
+
+ def to_i #convert to int
+ @value.to_i
+ end
+
+ def to_f #convert to float`
+ @value.to_f
+ end
+
+ private
+ def max(a,b)
+ a > b ? a : b
+ end
+end
+
+def demo()
+ x = FixNum.new(40)
+ y = FixNum.new(12, 0)
+
+ puts "sum of #{x} and #{y} is #{x+y}"
+ puts "product of #{x} and #{y} is #{x*y}"
+
+ z = x/y
+ puts "#{z} has #{z.places} places"
+ unless z.places
+ z.places = 2
+ end
+
+ puts "div of #{x} by #{y} is #{z}"
+ puts "square of that is #{z*z}"
+end
+
+if __FILE__ == $0
+ demo()
+end
+
+
+# @@PLEAC@@_14.1
+# There are dbm, sdbm, gdbm modules
+# and the bdb module for accessing the berkeley db
+# sdbm seem to be available on the most systems,
+# so we use it here
+#
+require "sdbm"
+SDBM.open("filename", 0666) { |dbobj|
+ # raises exception if open error
+
+ # the returned sdbm-dbobj has most of the methods of a hash
+ v = dbobj["key"]
+ dbobj["key"] = "newvalue"
+ if dbobj.has_key?("key")
+ # ...
+ end
+ dbobj.delete("key2")
+}
+# database is open only inside the block.
+
+# It is also possible to use a open .. close pair:
+dbobj = SDBM.open("filename", 0666)
+#.. do something with dbobj
+dbobj.close
+
+#!/usr/bin/ruby -w
+# userstats - generate statistics on who is logged in
+# call with usernames as argument to display the totals
+# for the given usernames, call with "ALL" to display all users
+
+require "sdbm"
+filename = '/tmp/userstats.db'
+SDBM.open(filename, 0666) { |dbobj|
+ if ARGV.length > 0
+ if ARGV[0] == "ALL"
+ # ARGV is constant, so we need the variable userlist
+ userlist = dbobj.keys().sort()
+ else
+ userlist = ARGV
+ end
+ userlist.each { |user|
+ print "#{user}\t#{dbobj[user]}\n"
+ }
+ else
+ who = `who`
+ who.split("\n").each { |line|
+ md = /^(\S+)/.match(line)
+ raise "Bad line from who: #{line}" unless md
+ # sdbm stores only strings, so "+=" doesn't work,
+ # we need to convert them expicitly back to integer.
+ if dbobj.has_key?(md[0])
+ dbobj[md[0]] = dbobj[md[0]].to_i + 1
+ else
+ dbobj[md[0]] = "1"
+ end
+ }
+ end
+}
+
+
+# @@PLEAC@@_14.2
+# using open and clear
+dbobj = SDBM.open("filename", 0666)
+dbobj.clear()
+dbobj.close()
+# deleting file and recreating it
+# the filenames depend on the flavor of dbm you use,
+# for example sdbm has two files named filename.pag and filename.dir,
+# so you need to delete both files
+begin
+ File.delete("filename")
+ # raises Exception if not exist
+ dbobj = SDBM.open("filename", 0666)
+rescue
+ # add error handling here
+end
+
+
+# @@PLEAC@@_14.3
+# sdbm2gdbm: converts sdbm database to a gdbm database
+require "sdbm"
+require "gdbm"
+
+unless ARGV.length == 2
+ fail "usage: sdbm2gdbm infile outfile"
+end
+infile = ARGV[0]
+outfile = ARGV[1]
+
+sdb = SDBM.open(infile)
+gdb = GDBM.open(outfile, 0666)
+sdb.each { |key, val|
+ gdb[key] = val
+}
+gdb.close
+sdb.close
+
+
+# @@PLEAC@@_14.4
+#!/usr/bin/ruby -w
+# dbmmerge: merges two dbm databases
+require "sdbm"
+
+unless ARGV.length == 3
+ fail "usage: dbmmerge indb1 indb2 outdb"
+end
+infile1 = ARGV[0]
+infile2 = ARGV[0]
+outfile = ARGV[2]
+
+in1 = SDBM.open(infile1, nil)
+in2 = SDBM.open(infile2, nil)
+outdb = SDBM.open(outfile, 0666)
+
+[in1, in2].each { |indb|
+ indb.each { |key, val|
+ if outdb.has_key?(key)
+ # decide which value to set.
+ # set outdb[key] if necessary
+ else
+ outdb[key] = val
+ end
+ }
+}
+in1.close
+in2.close
+outdb.close
+
+
+# @@PLEAC@@_14.7
+# we write a tie method that extends the Array class.
+# It reads the file into the memory, executes the code block
+# in which you can manipulate the array as needed, and writes
+# the array back to the file after the end of the block execution
+class Array
+ def tie(filename, flags)
+ File.open(filename, flags) { |f|
+ f.each_line { |line|
+ self.push(line.chomp)
+ }
+ yield
+ f.rewind
+ each { |line|
+ if line
+ f.puts(line)
+ else
+ f.puts ""
+ end
+ }
+ }
+ end
+end
+
+array = Array.new
+array.tie("/tmp/textfile.txt", File::RDWR|File::CREAT) {
+ array[4] = "a new line 4"
+}
+
+# The tied array can be manipulated like a normal array,
+# so there is no need for a special API, and the recno_demo program
+# to demonstrate is API is useless
+
+
+# tied array demo: show how to use array with a tied file
+filename = "db_file.txt"
+lines = Array.new
+File.unlink(filename) if File.exists?(filename)
+lines.tie(filename, File::RDWR | File::CREAT) {
+ # first create a textfile to play with
+ lines[0] = "zero"
+ lines[1] = "one"
+ lines[2] = "two"
+ lines[3] = "three"
+ lines[4] = "four"
+
+ # print the records in order.
+ # Opposed to perl, the tied array behaves exactly as a normal array
+ puts "\nOriginal"
+ for i in 0..(lines.length-1)
+ puts "#{i}: #{lines[i]}"
+ end
+
+ #use push and pop
+ a = lines.pop
+ lines.push("last")
+ puts("The last line was [#{a}]")
+
+ #use shift and unshift
+ a = lines.shift
+ lines.unshift("first")
+ puts("The first line was [#{a}]")
+
+ # add record after record 2
+ i = 2
+ lines.insert(i + 1, "Newbie")
+
+ # add record before record one
+ i = 1
+ lines.insert(i, "New One")
+
+ # delete record 3
+ lines.delete_at(3)
+
+ #now print the records in reverse order
+ puts "\nReverse"
+ (lines.length - 1).downto(0){ |i|
+ puts "#{i}: #{lines[i]}"
+ }
+
+}
+
+
+# @@PLEAC@@_14.8
+# example to store complex data in a database
+# uses marshall from the standard library
+require "sdbm"
+db = SDBM.open("pleac14-8-database", 0666)
+
+# convert the Objects into strings and back by using the Marshal module.
+# Most normal objects can be converted out of the box,
+# but not special things like procedure objects,
+# IO instance variables, singleton objects
+
+db["Tom Christiansen"] = Marshal.dump(["book author", "tchrist@perl.com"])
+db["Tom Boutell"] = Marshal.dump(["shareware author",
+"boutell@boutell.com"])
+
+name1 = "Tom Christiansen"
+name2 = "Tom Boutell"
+
+tom1 = Marshal.load(db[name1])
+tom2 = Marshal.load(db[name2])
+
+puts "Two Toming: #{tom1} #{tom2}"
+
+if tom1[0] == tom2[0] && tom1[1] == tom2[1]
+ puts "You're having runtime fun with one Tom made two."
+else
+ puts "No two Toms are ever alike"
+end
+
+# To change parts of an entry, get the whole entry, change the parts,
+# and save the whole entry back
+entry = Marshal.load(db["Tom Boutell"])
+entry[0] = "Poet Programmer"
+db["Tom Boutell"] = Marshal.dump(entry)
+db.close
+
+
+# @@PLEAC@@_14.9
+# example to make data persistent
+# uses Marshal from the standard lib
+# Stores the data in a simple file,
+# see 14.8 on how to store it in a dbm file
+
+# The BEGIN block is executed before the rest of the script
+# we use global variables here because local variables
+# will go out of scope and are not accessible from the main script
+
+BEGIN {
+ $persistent_store = "persitence.dat"
+ begin
+ File.open($persistent_store) do |f|
+ $stringvariable1 = Marshal.load(f)
+ $arrayvariable2 = Marshal.load(f)
+ end
+ rescue
+ puts "Can not open #{$persistent_store}"
+ # Initialisation if this script runs the first time
+ $stringvariable1 = ""
+ $arrayvariable2 = []
+ end
+}
+
+END {
+ File.open($persistent_store, "w+") do |f|
+ Marshal.dump($stringvariable1, f)
+ Marshal.dump($arrayvariable2, f)
+ end
+}
+
+# simple test program
+puts $stringvariable1
+puts $arrayvariable2
+$stringvariable1 = "Hello World"
+$arrayvariable2.push(5)
+puts $stringvariable1
+puts $arrayvariable2
+
+
+# @@PLEAC@@_14.10
+#!/usr/bin/ruby -w
+# Ruby has a dbi module with an architecture similar
+# to the Perl dbi module: the dbi module provides an unified
+# interface and uses specialized drivers for each dbms vendor
+#
+begin
+ DBI.connect("DBI:driver:driverspecific", "username", "auth") {
+ |dbh|
+
+ dbh.do(SQL1)
+
+ dbh.prepare(SQL2){ |sth|
+ sth.execute
+ sth.fetch {|row|
+ # ...
+ }
+ } # end of block finishes the statement handle
+ } # end of block closes the database connection
+rescue DBI::DatabaseError => e
+ puts "dbi error occurred"
+ puts "Error code: #{e.err}"
+ puts "Error message: #{e.errstr}"
+end
+
+#!/usr/bin/ruby -w
+# dbusers - example for mysql which creates a table,
+# fills it with values, retrieves the values back,
+# and finally destroys the table.
+
+require "dbi"
+
+# replacement for the User::pwnt module
+def getpwent
+ result = []
+ File.open("/etc/passwd") {|file|
+ file.each_line {|line|
+ next if line.match(/^#/)
+ cols = line.split(":")
+ result.push([cols[2], cols[0]])
+ }
+ }
+ result
+end
+
+begin
+ DBI.connect("DBI:Mysql:pleacdatabase", "pleac", "pleacpassword") {
+ |conn|
+
+ conn.do("CREATE TABLE users (uid INT, login CHAR(8))")
+
+ users = getpwent
+
+ conn.prepare("INSERT INTO users VALUES (?,?)") {|sth|
+ users.each {|entry|
+ sth.execute(entry[0], entry[1])
+ }
+ }
+
+ conn.execute("SELECT uid, login FROM users WHERE uid < 50") {|sth|
+ sth.fetch {|row|
+ puts row.collect {|col|
+ if col.nil?
+ "(null)"
+ else
+ col
+ end
+ }.join(", ")
+ }
+ }
+
+ conn.do("DROP TABLE users")
+ }
+rescue DBI::DatabaseError => e
+ puts "dbi error occurred"
+ puts "Error code: #{e.err}"
+ puts "Error message: #{e.errstr}"
+end
+
+
+# @@PLEAC@@_15.1
+# This test program demonstrates parsing program arguments.
+# It uses the optparse library, which is included with ruby 1.8
+# It handles classic unix style and gnu style options
+require 'optparse'
+
+@debugmode = false
+@verbose = false
+
+ARGV.options do |opts|
+ opts.banner = "Usage: ruby #{$0} [OPTIONS] INPUTFILES"
+
+ opts.on("-h", "--help", "show this message") {
+ puts opts
+ exit
+ }
+ # The OptionParser#on method is called with a specification of short
+ # options, of long options, a data type spezification and user help
+ # messages for this option.
+ # The method analyses the given parameter and decides what it is,
+ # so you can leave out the long option if you don't need it
+ opts.on("-v", "--[no-]verbose=[FLAG]", TrueClass, "run verbosly") {
+ |@verbose| # sets @verbose to true or false
+ }
+ opts.on("-D", "--DEBUG", TrueClass, "turns on debug mode" ){
+ |@debugmode| # sets @debugmode to true
+ }
+ opts.on("-c", "--count=NUMBER", Integer, "how many times we do it" ){
+ |@count| # sets @count to given integer
+ }
+ opts.on("-o", "--output=FILE", String, "file to write output to"){
+ |@outputfile| # sets @outputfile to given string
+ }
+ opts.parse!
+end
+
+# example to use the options in the main program
+puts "Verbose is on" if @verbose
+puts "Debugmode is on" if @debugmode
+puts "Outfile is #{@outputfile}" if defined? @outputfile
+puts "Count is #{@count}" if defined? @count
+ARGV.each { |param|
+ puts "Got parameter #{param}"
+}
+
+
+# @@PLEAC@@_15.4
+buf = "\0" * 8
+$stdout.ioctl(0x5413, buf)
+ws_row, ws_col, ws_xpixel, ws_ypixel = buf.unpack("S4")
+
+raise "You must have at least 20 characters" unless ws_col >= 20
+max = 0
+values = (1..5).collect { rand(20) } # generate an array[5] of rand values
+for i in values
+ max = i if max < i
+end
+ratio = Float(ws_col-12)/max # chars per unit
+for i in values
+ printf "%8.1f %s\n", i, "*" * (ratio*i)
+end
+
+# gives, for example:
+# 15.0 *******************************
+# 10.0 *********************
+# 5.0 **********
+# 14.0 *****************************
+# 18.0 **************************************
+
+
+# @@PLEAC@@_16.1
+output = `program args` # collect output into one multiline string
+output = `program args`.split # collect output into array, one line per
+element
+
+readme = IO.popen("ls")
+output = ""
+while readme.gets do
+ output += $_
+end
+readme.close
+
+`fsck -y /dev/rsd1a` # BAD AND SCARY in Perl because it's managed by the shell
+ # I donna in Ruby ...
+
+# so the "clean and secure" version
+readme, writeme = IO.pipe
+pid = fork {
+ # child
+ $stdout = writeme
+ readme.close
+ exec('find', '..')
+}
+# parent
+Process.waitpid(pid, 0)
+writeme.close
+while readme.gets do
+ # do something with $_
+end
+
+
+# @@PLEAC@@_16.2
+status = system("xemacs #{myfile}")
+
+status = system("xemacs", myfile)
+
+system("cmd1 args | cmd2 | cmd3 >outfile")
+system("cmd args <infile >outfile 2>errfile")
+
+# stop if the command fails
+raise "$program exited funny: #{$?}" unless system("cmd", "args1", "args2")
+
+# get the value of the signal sent to the child
+# even if it is a SIGINT or SIGQUIT
+system(arglist)
+raise "program killed by signal #{$?}" if ($? & 127) != 0
+
+pid = fork {
+ trap("SIGINT", "IGNORE")
+ exec("sleep", "10")
+}
+trap ("SIGINT") {
+ puts "Tsk tsk, no process interruptus"
+}
+Process.waitpid(pid, 0)
+
+# Ruby doesn't permit to lie to the program called by a 'system'.
+# (ie specify what return argv[0] in C, $0 in Perl/Ruby ...)
+# A (dirty) way is to create a link (under Unix), run this link and
+# erase it. Somebody has a best idea ?
+
+
+# @@PLEAC@@_16.3
+exec("archive *.data")
+
+exec("archive", "accounting.data")
+
+exec("archive accounting.data")
+
+
+# @@PLEAC@@_16.4
+# read the output of a program
+IO.popen("ls") {|readme|
+ while readme.gets do
+ # ...
+ end
+}
+# or
+readme = IO.popen("ls")
+while readme.gets do
+ # ...
+end
+readme.close
+
+# "write" in a program
+IO.popen("cmd args","w") {|pipe|
+ pipe.puts("data")
+ pipe.puts("foo")
+}
+
+# close wait for the end of the process
+read = IO.popen("sleep 10000") # child goes to sleep
+read.close # and the parent goes to lala land
+
+writeme = IO.popen("cmd args", "w")
+writeme.puts "hello" # program will get hello\n on STDIN
+writeme.close # program will get EOF on STDIN
+
+# send in a pager (eg less) all output
+$stdout = IO.popen("/usr/bin/less","w")
+print "huge string\n" * 10000
+
+
+# @@PLEAC@@_16.5
+#-----------------------------
+def head(lines = 20)
+ pid = open("|-","w")
+ if pid == nil
+ return
+ else
+ while gets() do
+ pid.print
+ lines -= 1
+ break if lines == 0
+ end
+ end
+ exit
+end
+
+head(100)
+while gets() do
+ print
+end
+#-----------------------------
+1: > Welcome to Linux, version 2.0.33 on a i686
+
+2: >
+
+3: > "The software required `Windows 95 or better',
+
+4: > so I installed Linux."
+#-----------------------------
+> 1: Welcome to Linux, Kernel version 2.0.33 on a i686
+
+> 2:
+
+> 3: "The software required `Windows 95 or better',
+
+> 4: so I installed Linux."
+#-----------------------------
+#!/usr/bin/ruby
+# qnumcat - demo additive output filters
+
+def number()
+ pid = open("|-","w")
+ if pid == nil
+ return
+ else
+ while gets() do pid.printf("%d: %s", $., $_); end
+ end
+ exit
+end
+
+def quote()
+ pid = open("|-","w")
+ if pid == nil
+ return
+ else
+ while gets() do pid.print "> #{$_}" end
+ end
+ exit
+end
+
+number()
+quote()
+
+while gets() do
+ print
+end
+$stdout.close
+exit
+
+
+# @@PLEAC@@_16.6
+ARGV.map! { |arg|
+ arg =~ /\.(gz|Z)$/ ? "|gzip -dc #{arg}" : arg
+}
+for file in ARGV
+ fh = open(file)
+ while fh.gets() do
+ # .......
+ end
+end
+#-----------------------------
+ARGV.map! { |arg|
+ arg =~ %r#^\w+://# ? "|GET #{arg}" : arg #
+}
+for file in ARGV
+ fh = open(file)
+ while fh.gets() do
+ # .......
+ end
+end
+#-----------------------------
+pwdinfo = (`domainname` =~ /^(\(none\))?$/) ? '/etc/passwd' : '|ypcat passwd';
+pwd = open(pwdinfo);
+#-----------------------------
+puts "File, please? ";
+file = gets().chomp();
+fh = open(file);
+
+
+# @@PLEAC@@_16.7
+output = `cmd 2>&1` # with backticks
+# or
+ph = open("|cmd 2>&1") # with an open pipe
+while ph.gets() { } # plus a read
+#-----------------------------
+output = `cmd 2>/dev/null` # with backticks
+# or
+ph = open("|cmd 2>/dev/null") # with an open pipe
+while ph.gets() { } # plus a read
+#-----------------------------
+output = `cmd 2>&1 1>/dev/null` # with backticks
+# or
+ph = open("|cmd 2>&1 1>/dev/null") # with an open pipe
+while ph.gets() { } # plus a read
+#-----------------------------
+output = `cmd 3>&1 1>&2 2>&3 3>&-` # with backticks
+# or
+ph = open("|cmd 3>&1 1>&2 2>&3 3>&-") # with an open pipe
+while ph.gets() { } # plus a read
+#-----------------------------
+system("program args 1>/tmp/program.stdout 2>/tmp/program.stderr")
+#-----------------------------
+output = `cmd 3>&1 1>&2 2>&3 3>&-`
+#-----------------------------
+fd3 = fd1
+fd1 = fd2
+fd2 = fd3
+fd3 = undef
+#-----------------------------
+system("prog args 1>tmpfile 2>&1")
+system("prog args 2>&1 1>tmpfile")
+#-----------------------------
+# system ("prog args 1>tmpfile 2>&1")
+fd1 = "tmpfile" # change stdout destination first
+fd2 = fd1 # now point stderr there, too
+#-----------------------------
+# system("prog args 2>&1 1>tmpfile")
+fd2 = fd1 # stderr same destination as stdout
+fd1 = "tmpfile" # but change stdout destination
+#-----------------------------
+# It is often better not to rely on the shell,
+# because of portability, possible security problems
+# and bigger resource usage. So, it is often better to use the open3 library.
+# See below for an example.
+# opening stdin, stdout, stderr
+require "open3"
+stdin, stdout, stderr = Open3.popen('cmd')
+
+
+# @@PLEAC@@_16.8
+#-----------------------------
+# Contrary to perl, we don't need to use a module in Ruby
+fh = Kernel.open("|" + program, "w+")
+fh.puts "here's your input\n"
+output = fh.gets()
+fh.close()
+#-----------------------------
+Kernel.open("|program"),"w+") # RIGHT !
+#-----------------------------
+# Ruby has already object methods for I/O handles
+#-----------------------------
+begin
+ fh = Kernel.open("|" + program_and_options, "w+")
+rescue
+ if ($@ ~= /^open/)
+ $stderr.puts "open failed : #{$!} \n #{$@} \n"
+ break
+ end
+ raise # reraise unforseen exception
+end
+
+
+# @@PLEAC@@_16.13
+#% kill -l
+#HUP INT QUIT ILL TRAP ABRT BUS FPE KILL USR1 SEGV USR2 PIPE
+#ALRM TERM CHLD CONT STOP TSTP TTIN TTOU URG XCPU XFSZ VTALRM
+#PROF WINCH POLL PWR
+#-----------------------------
+#% ruby -e 'puts Signal.list.keys.join(" ")'
+#PWR USR1 BUS USR2 TERM SEGV KILL POLL STOP SYS TRAP IOT HUP INT #
+#WINCH XCPU TTIN CLD TSTP FPE IO TTOU PROF CHLD CONT PIPE ABRT
+#VTALRM QUIT ILL XFSZ URG ALRM
+#-----------------------------
+# After that, the perl script create an hash equivalent to Signal.list,
+# and an array. The array can be obtained by :
+signame = []
+Signal.list.each { |name, i| signame[i] = name }
+
+
+# @@PLEAC@@_16.14
+Process.kill(9, pid) # send $pid a signal 9
+Process.kill(-1, Process.getpgrp()) # send whole job a signal 1
+Process.kill("USR1", $$) # send myself a SIGUSR1
+Process.kill("HUP", pid1, pid2, pid3) # send a SIGHUP to processes in @pids
+#-----------------------------
+begin
+ Process.kill(0, minion)
+ puts "#{minion} is alive!"
+rescue Errno::EPERM # changed uid
+ puts "#{minion} has escaped my control!";
+rescue Errno::ESRCH
+ puts "#{minion} is deceased."; # or zombied
+rescue
+ puts "Odd; I couldn't check the status of #{minion} : #{$!}"
+end
+
+
+# @@PLEAC@@_16.15
+Kernel.trap("QUIT", got_sig_quit) # got_sig_quit = Proc.new { puts "Quit\n" }
+trap("PIPE", "got_sig_quit") # def got_sig_pipe ...
+trap("INT") { ouch++ } # increment ouch for every SIGINT
+#-----------------------------
+trap("INT", "IGNORE") # ignore the signal INT
+#-----------------------------
+trap("STOP", "DEFAULT") # restore default STOP signal handling
+
+
+# @@PLEAC@@_16.16
+# the signal handler
+def ding
+ trap("INT", "ding")
+ puts "\aEnter your name!"
+end
+
+# prompt for name, overriding SIGINT
+def get_name
+ save = trap("INT", "ding")
+
+ puts "Kindly Stranger, please enter your name: "
+ name = gets().chomp()
+ trap("INT", save)
+ name
+end
+
+
+# @@PLEAC@@_16.21
+# implemented thanks to http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/1760
+require 'timeout'
+
+# we'll do something vastly more useful than cookbook to demonstrate timeouts
+begin
+ timeout(5) {
+ waitsec = rand(10)
+ puts "Let's see if a sleep of #{waitsec} seconds is longer than 5 seconds..."
+ system("sleep #{waitsec}")
+ }
+ puts "Timeout didn't occur"
+rescue Timeout::Error
+ puts "Timed out!"
+end
+
+
+# @@PLEAC@@_17.1
+# A basic TCP client connection
+require 'socket'
+begin
+ t = TCPSocket.new('www.ruby-lang.org', 'www')
+rescue
+ puts "error: #{$!}"
+else
+ # ... do something with the socket
+ t.print "GET / HTTP/1.0\n\n"
+ answer = t.gets(nil)
+ # and terminate the connection when we're done
+ t.close
+end
+
+# Using the evil low level socket API
+require 'socket'
+# create a socket
+s = Socket.new(Socket::AF_INET, Socket::SOCK_STREAM, 0)
+# build the address of the remote machine
+sockaddr_server = [Socket::AF_INET, 80,
+ Socket.gethostbyname('www.ruby-lang.org')[3],
+ 0, 0].pack("snA4NN")
+# connect
+begin
+ s.connect(sockaddr_server)
+rescue
+ puts "error: #{$!}"
+else
+ # ... do something with the socket
+ s.print "GET / HTTP/1.0\n\n"
+ # and terminate the connection when we're done
+ s.close
+end
+
+# TCP connection with management of error (DNS)
+require 'socket'
+begin
+ client = TCPSocket.new('does not exists', 'www')
+rescue
+ puts "error: #{$!}"
+end
+
+# TCP connection with a time out
+require 'socket'
+require 'timeout'
+begin
+ timeout(1) do #the server has one second to answer
+ client = TCPSocket.new('www.host.com', 'www')
+ end
+rescue
+ puts "error: #{$!}"
+end
+
+
+# @@PLEAC@@_17.12
+require 'socket'
+
+class Preforker
+ attr_reader (:child_count)
+
+ def initialize(prefork, max_clients_per_child, port, client_handler)
+ @prefork = prefork
+ @max_clients_per_child = max_clients_per_child
+ @port = port
+ @child_count = 0
+
+ @reaper = proc {
+ trap('CHLD', @reaper)
+ pid = Process.wait
+ @child_count -= 1
+ }
+
+ @huntsman = proc {
+ trap('CHLD', 'IGNORE')
+ trap('INT', 'IGNORE')
+ Process.kill('INT', 0)
+ exit
+ }
+
+ @client_handler=client_handler
+ end
+
+ def child_handler
+ trap('INT', 'EXIT')
+ @client_handler.setUp
+ # wish: sigprocmask UNblock SIGINT
+ @max_clients_per_child.times {
+ client = @server.accept or break
+ @client_handler.handle_request(client)
+ client.close
+ }
+ @client_handler.tearDown
+ end
+
+ def make_new_child
+ # wish: sigprocmask block SIGINT
+ @child_count += 1
+ pid = fork do
+ child_handler
+ end
+ # wish: sigprocmask UNblock SIGINT
+ end
+
+ def run
+ @server = TCPserver.open(@port)
+ trap('CHLD', @reaper)
+ trap('INT', @huntsman)
+ loop {
+ (@prefork - @child_count).times { |i|
+ make_new_child
+ }
+ sleep .1
+ }
+ end
+end
+
+#-----------------------------
+#!/usr/bin/ruby
+
+require 'Preforker'
+
+class ClientHandler
+ def setUp
+ end
+
+ def tearDown
+ end
+
+ def handle_request(client)
+ # do stuff
+ end
+end
+
+server = Preforker.new(1, 100, 3102, ClientHandler.new)
+server.run
+
+
+# @@PLEAC@@_18.2
+require 'net/ftp'
+
+begin
+ ftp = Net::FTP::new("ftp.host.com")
+ ftp.login(username,password)
+ ftp.chdir(directory)
+ ftp.get(filename)
+ ftp.put(filename)
+rescue Net::FTPError
+ $stderr.print "FTP failed: " + $!
+ensure
+ ftp.close() if ftp
+end
+
+# A better solution for a local use could be :
+Net::FTP::new("ftp.host.com") do |ftp|
+ ftp.login(username,password)
+ ftp.chdir(directory)
+ ftp.get(filename)
+ ftp.put(filename)
+end
+
+# If you have only one file to get, there is a simple solution :
+require 'open-uri'
+open("ftp://www.ruby-lang.org/path/filename") do |fh|
+ # read from filehandle fh
+end
+#--------------------------------------------
+# to wait a defined time for the connection,
+# use the timeout module
+require 'timeout'
+begin
+ timeout(30){
+ ftp = Net::FTP::new("ftp.host.com")
+ ftp.debug_mode = true
+ }
+rescue Net::FTPError
+ $stderr.puts "Couldn't connect."
+rescue Timeout::Error
+ $stderr.puts "Timeout while connecting to server."
+end
+
+begin
+ ftp.login()
+rescue Net::FTPError
+ $stderr.print "Couldn't authentificate.\n"
+end
+
+begin
+ ftp.login(username)
+rescue Net::FTPError
+ $stderr.print "Still couldn't authenticate.\n"
+end
+
+begin
+ ftp.login(username, password)
+rescue Net::FTPError
+ $stderr.print "Couldn't authenticate, even with explicit
+ username and password.\n"
+end
+
+begin
+ ftp.login(username, password, account)
+rescue Net::FTPError
+ $stderr.print "No dice. It hates me.\n"
+end
+#-----------------------------
+ftp.put(localfile, remotefile)
+#-----------------------------
+# Sending data from STDIN is not directly supported
+# by the ftp library module. A possible way to do it is to use the
+# storlines method directly to send raw commands to the ftp server.
+#-----------------------------
+ftp.get(remotefile, localfile)
+#-----------------------------
+ftp.get(remotefile) { |data| puts data }
+#-----------------------------
+ftp.chdir("/pub/ruby")
+print "I'm in the directory ", ftp.pwd(), "\n"
+#-----------------------------
+ftp.mkdir("/pub/ruby/new_dir")
+#-----------------------------
+lines = ftp.ls("/pub/ruby/")
+# => ["drwxr-xr-x 2 matz users 4096 July 17 1998 1.0", ... ]
+
+latest = ftp.dir("/pub/ruby/*.tgz").sort.last
+
+ftp.nlst("/pub/ruby")
+# => ["/pub/ruby/1.0", ... ]
+#-----------------------------
+ftp.quit()
+
+
+# @@PLEAC@@_18.6
+require 'net/telnet'
+t = Net::Telnet::new( "Timeout" => 10,
+ "Prompt" => /%/,
+ "Host" => host )
+t.login(username, password)
+files = t.cmd("ls")
+t.print("top")
+process_string = t.waitfor(/\d+ processes/)
+t.close
+#-----------------------------
+/[$%#>] \z/n
+#-----------------------------
+# In case of an error, the telnet module throws an exception.
+# For control of the behavior in case of an error,
+# you just need to catch the exceptions and do your custom
+# error handling.
+#-----------------------------
+begin
+ telnet.login(username, password)
+rescue TimeoutError
+ fail "Login failed !\n"
+end
+#-----------------------------
+telnet.waitfor('/--more--/')
+#-----------------------------
+telnet.waitfor(String => 'greasy smoke', Timeout => 30)
+
+
+# @@PLEAC@@_18.7
+require 'ping'
+
+puts "#{host} is alive.\n" if Ping.pingecho(host);
+#-----------------------------
+# the ping module only use TCP ping, not ICMP even if we are root
+if Ping.pingecho("kingkong.com")
+ puts "The giant ape lives!\n";
+else
+ puts "All hail mighty Gamera, friend of children!\n";
+end
+
+
+# @@PLEAC@@_19.1
+#!/usr/local/bin/ruby -w
+# hiweb - load CGI class to decode information given by web server
+
+require 'cgi'
+
+cgi = CGI.new('html3')
+
+# get a parameter from a form
+value = cgi.params['PARAM_NAME'][0]
+
+# output a document
+cgi.out {
+ cgi.html {
+ cgi.head { cgi.title { "Howdy there!" } } +
+ cgi.body { cgi.p { "You typed: " + cgi.tt {
+ CGI.escapeHTML(value) } } }
+ }
+}
+
+require 'cgi'
+cgi = CGI.new
+who = cgi.param["Name"][0] # first param in list
+phone = cgi.param["Number"][0]
+picks = cgi.param["Choices"] # complete list
+
+print cgi.header( 'type' => 'text/plain',
+ 'expires' => Time.now + (3 * 24 * 60 * 60) )
+
+
+# @@PLEAC@@_19.3
+#!/usr/local/bin/ruby -w
+# webwhoami - show web user's id
+require 'etc'
+print "Content-Type: text/plain\n\n"
+print "Running as " + Etc.getpwuid.name + "\n"
+
+# % ruby -wc cgi-script # just check syntax
+
+# % ruby -w cgi-script # params from stdin
+# (offline mode: enter name=value pairs on standard input)
+# name=joe
+# number=10
+# ^D
+
+# % ruby -w cgi-script name=joe number=10 # run with mock form input
+# % ruby -d cgi-script name=joe number=10 # ditto, under the debugger
+
+# POST method script in csh
+# % (setenv HTTP_METHOD POST; ruby -w cgi-script name=joe number=10)
+# POST method script in sh
+# % HTTP_METHOD=POST perl -w cgi-script name=joe number=10
+
+
+# @@PLEAC@@_19.4
+# ruby has several security levels, the level "1" is similar to perls taint mode.
+# It can be switched on by providing the -T command line parameter
+# or by setting $SAFE to 1. Setting $SAFE to 2,3 or 4 restricts possible
+# harmful operations further.
+
+#!/usr/bin/ruby -T
+$SAFE = 1
+File.open(ARGV[0], "w")
+# ruby warns with:
+# taint1.rb:2:in `initialize': Insecure operation - initialize (SecurityError)
+
+$SAFE = 1
+file = ARGV[0]
+unless /^([\w.-]+)$/.match(file)
+ raise "filename #{file} has invalid characters"
+end
+file = $1
+# In ruby, even the back reference from a regular expression stays tainted.
+# you need to explicitly untaint the variable:
+file.untaint
+File.open(file, "w")
+
+# Race condition exists like in perl:
+unless File.exists(filename) # Wrong because of race condition
+ File.open(filename, "w")
+end
+
+
+
+# @@PLEAC@@_19.10
+preference_value = cgi.cookies["preference name"][0]
+
+packed_cookie = CGI::Cookie.new("name" => "preference name",
+ "value" => "whatever you'd like",
+ "expires" => Time.local(Time.now.year + 2,
+ Time.now.mon, Time.now.day, Time.now.hour, Time.now.min, Time.now.sec) )
+
+cgi.header("cookie" => [packed_cookie])
+
+#!/usr/local/bin/ruby -w
+# ic_cookies - sample CGI script that uses a cookie
+require 'cgi'
+
+cgi = CGI.new('html3')
+
+cookname = "favorite ice cream"
+favorite = cgi.params["flavor"][0]
+tasty = cgi.cookies[cookname][0] || 'mint'
+
+unless favorite
+ cgi.out {
+ cgi.html {
+ cgi.head { cgi.title { "Ice Cookies" } } +
+ cgi.body {
+ cgi.h1 { "Hello Ice Cream" } +
+ cgi.hr +
+ cgi.form {
+ cgi.p { "Please select a flavor: " +
+ cgi.text_field("flavor", tasty ) }
+ } +
+ cgi.hr
+ }
+ }
+ }
+else
+ cookie = CGI::Cookie.new( "name" => cookname,
+ "value" => favorite,
+ "expires" => Time.local(Time.now.year + 2,
+Time.now.mon, Time.now.day, Time.now.hour, Time.now.min, Time.now.sec) )
+ cgi.out("cookie" => [cookie]) {
+ cgi.html {
+ cgi.head { cgi.title { "Ice Cookies" } } +
+ cgi.body {
+ cgi.h1 { "Hello Ice Cream" } +
+ cgi.p { "You chose as your favorite flavor `#{favorite}'." }
+ }
+ }
+ }
+end
+
+
+# @@PLEAC@@_20.9
+def templatefile(filename, fillings)
+ aFile = File.new(filename, "r")
+ text = aFile.read()
+ aFile.close()
+ pattern = Regexp.new('%%(.*?)%%')
+ text.gsub!(pattern) {
+ fillings[$1] || ""
+ }
+ text
+end
+
+fields = {
+ 'username' => whats_his_name,
+ 'count' => login_count,
+ 'total' => minutes_used
+}
+puts templatefile('simple.template', fields)
+
+# @@INCOMPLETE@@
+# An example using databases is missing
+
diff --git a/bench/example.rubyfast b/bench/example.rubyfast new file mode 100644 index 0000000..a3367d1 --- /dev/null +++ b/bench/example.rubyfast @@ -0,0 +1,10428 @@ +#n ist das anzuzeigende Feld, o die Zwischenablage für eine #Iteration p dient dazu zu jedem Feld die Nachbarschaft zu definieren. +n=Array.new +o=Array.new +p=Array.new +x=0 + +#Anlegen des Arrays n +while x<=10000 + n[x]=0 + x +=1 +end + +#Anlegen des Arrays p +x=0 +while x<=10000 + p[x]=3 + x +=1 +end + + +n[2]=1 +n[102]=1 +n[202]=1 + +loop{ +x=0 +#Die nachbarschaften aller Felder werden überprüft +while x<10000 +#a bis f dienen dazu die Nachbarschaft festzulegen. Man stelle sich die #Zahl von 1 bis 64 im Binärcode vor 1 bedeutet an 0 aus + a=p[x]/32<1 ? 0 : 1 + b=(p[x]%32)/16<1 ? 0 : 1 + c=(p[x]%16)/8<1 ? 0 : 1 + d=(p[x]%8)/4<1 ? 0 : 1 + e=(p[x]%4)/2<1 ? 0 : 1 + f=(p[x]%2)<1 ? 0 : 1 +#t= n[x-201].to_i*b+n[x-200].to_i*d+n[x-199].to_i*b+ + #n[x-102].to_i*a+n[x-101].to_i*e+n[x-100].to_i+n[x-99].to_i*f+n[x-98]#to_i*a+ + #n[x-2].to_i*c+n[x-1].to_i+n[x+1].to_i+n[x+2].to_i*c+ + #n[x+98].to_i*a+n[x+99].to_i*f+n[x+100].to_i+n[x+101].#to_i*e+n[x+102].to_i*a+ + #n[x+199].to_i*b+n[x+200].to_i*d+n[x+201].to_i*b + #Die Summe der Felder die zur Nachbarschaft gerechnet werden a bis f +#sind hierbei Multiplikatoren mit dem Wert 1oder 0 + +t=(x-201>=0? n[x-201].to_i : 0)*b+(x-200>=0? n[x-200].to_i : 0)*d+(x-199>=0? n[x-199].to_i : 0)*b+ + (x-102>=0? n[x-102].to_i : 0)*a+(x-101>=0?n[x-101].to_i : 0)*e+n[x-100].to_i+(x-99>=0? n[x-99].to_i : 0)*f+(x-98>=0? n[x-98].to_i : 0)*a+ + (x-2>=0? n[x-2].to_i : 0)*c+(x-1>=0? n[x-1].to_i : 0)+n[x+1].to_i+n[x+2].to_i*c+ + n[x+98].to_i*a+n[x+99].to_i*f+n[x+100].to_i+n[x+101].to_i*e+n[x+102].to_i*a+ + n[x+199].to_i*b+n[x+200].to_i*d+n[x+201].to_i*b + +#Bedingungen wann eine Zelle lebt,stirbt oder geboren wird im Moment +#sind die regeln 3 Nachbarn =Geburt und Nachbarn 3,2=Überleben +#sonst Tod + if t==3 + o[x]=1 + elsif t==2 and n[x]=1 + o[x]=1 + else + o[x]=0 + end + x+=1 +end +#wird überschrieben +n=o + +#und die Ausgabe folgt +g=%w{} +x=0 + +while x<100 + g[x]=n[100*x+1..100*x+100] + x+=1 +end +x=0 + +while x<100 + puts"#{g[x]}" + x+=1 +end + +puts"" +sleep(10) +} + +1E1E1 +puts 30.send(:/, 5) # prints 6 + +"instance variables can be #@included, #@@class_variables and #$globals as well." + +#%W[ but #@0illegal_values look strange.] + +%s#ruby allows strange#{constructs}. +%s#ruby allows strange#$constructs +%s#ruby allows strange#@@constructs + +%r\VERY STRANGE!\x00 + +~%r#<XMP>#i .. ~%r#</XMP>#i; + +a = <<"EOF" +This is a multiline here document +terminated by EOF on a line by itself +EOF + +b=(p[x] %32)/16<1 ? 0 : 1 + +<<"" +#{test} +#@bla +#die suppe!!! +\xfffff + +super <<-EOE % [ + EOE + +<<X +X +X +%s(uninter\)pre\ted) +%q(uninter\)pre\ted) +%Q(inter\)pre\ted) +:"inter\)pre\ted" +:'uninter\'pre\ted' + +%q[haha! [nesting [rocks] ! ] ] + + +################################################################## +class NP +def initialize a=@p=[], b=@b=[]; end +def +@;@b<<1;b2c end;def-@;@b<<0;b2c end +def b2c;if @b.size==8;c=0;@b.each{|b|c<<=1;c|=b};send( + 'lave'.reverse,(@p.join))if c==0;@p<<c.chr;@b=[] end + self end end ; begin _ = NP.new end +c +# ^ This is a bug :( + +# The Programming Language `NegaPosi' ++-+--++----+--+-+++--+-------+--++--+++---+-+++-+-+-+++-----+++-_ ++--++++--+---++-+-+-+++--+--+-+------+--++++-++---++-++---++-++-_ ++++--++-+-+--++--+++--+------+----+--++--+++-++-+----++------+--_ +-+-+----+++--+--+----+--+--+-++-++--+++-++++-++-----+-+-+----++-_ +---------+-+---- _ +################################################################## + + +# date: 03/18/2004 +# title: primes less than 1000 ( 2005 Obfuscated Ruby Contest ) +# author: Jim Lawless +# email: jimbo at radiks dotski net +# comments: This program will display all positive prime integers +# less than 1000. Program licens is the same as the Ruby +# license ( http://www.ruby-lang.org/en/LICENSE.txt ) + + $e="" + +def a() + $a=$a+1 +end + +def b() + $a=$a+5 +end + +def c() + $e=$e+$a.chr +end + +def d() + $a=10 +end + +def e() + $a=$a+16 +end + +d;e;b;a;a;a;a;a;c;d;e;e;e;e;e;e;b;a;a;a;a;c;d;e;e;e;a;a;a;c;d;e;e;b;b; +a;c;d;c;d;e;e;e;e;e;e;b;b;a;a;a;c;d;e;e;e;e;e;b;b;a;a;a;a;c;d;e;e;e;e; +e;b;b;a;a;a;a;a;c;d;e;e;e;e;e;e;a;a;c;d;e;e;e;e;e;b;b;a;c;d;e;b;a;c; +d;e;b;a;a;a;a;a;c;d;e;e;e;e;e;e;b;a;a;a;a;c;d;e;e;e;a;a;c;d;e;e;b;a;a; +c;d;e;e;b;a;c;d;e;e;b;a;c;d;e;e;b;a;c;d;c;d;e;b;a;a;a;a;a;c;d;e;e;e;e;e;b; +b;a;a;a;a;a;c;d;e;e;e;a;a;a;c;d;e;e;b;a;a;a;c;d;c;d;e;b;a;a;a;a;a;c;d;e; +e;e;e;e;b;b;a;a;c;d;e;e;e;e;e;e;a;a;a;a;a;c;d;e;e;e;e;e;e;b;b;a;c; +d;e;e;e;e;e;e;a;a;a;a;c;d;e;e;e;e;e;b;a;a;a;a;a;c;d;e;e;e;a;a;a;c;d;e; +e;b;a;c;d;c;d;e;e;e;e;e;e;b;b;a;a;a;c;d;e;e;e;e;e;b;b;a;a;a;a;c;d;e;e; +e;e;e;b;b;a;a;a;a;a;c;d;e;e;e;e;e;e;a;a;c;d;e;e;e;e;e;b;b;a;c;d;e;b; +a;c;d;e;b;a;a;a;a;a;c;d;e;e;e;e;e;b;b;a;a;a;a;a;c;d;e;e;e;a;a;c;d;e;b; +a;a;a;a;a;c;d;e;e;e;e;e;e;b;a;a;a;a;c;d;e;b;a;c;d;c;d;e;e;e;e;e;b;b;a; +a;a;a;a;c;d;e;e;e;e;e;b;b;a;a;c;d;e;b;b;a;a;a;a;c;d;e;b;a;c;d;e;b;b;a; +a;a;a;c;d;e;b;a;a;a;a;a;c;d;e;e;e;e;e;e;b;a;a;a;a;c;d;e;e;a;a;a;a;c; +d;e;e;e;e;e;e;a;a;a;c;d;e;e;e;e;e;e;a;a;a;a;a;c;d;e;e;e;e;e;b;a;a;a; +a;a;c;d;e;e;e;e;e;e;b;b;a;c;d;e;e;e;e;e;e;a;a;c;d;e;e;e;e;e;e;a;a;a; +a;a;c;d;e;b;b;a;a;a;a;c;d;e;b;a;a;a;a;a;c;d;e;e;e;e;e;b;b;a;a;a;a;a; +c;d;e;b;b;a;a;a;a;a;c;d;e;b;b;a;a;a;a;a;c;d;e;e;e;a;a;a;c;d;e;e;e;a;a; +a;c;d;e;e;b;a;c;d;e;b;b;a;a;a;a;a;c;d;e;b;a;c;d;c;d;e;b;a;a;a;a;a;c;d;e;e; +e;e;e;b;b;a;a;c;d;e;e;e;e;e;e;a;a;a;a;a;c;d;e;e;e;e;e;e;b;b;a;c;d;e; +e;e;e;e;e;a;a;a;a;c;d;e;e;e;e;e;b;a;a;a;a;a;c;d;e;e;e;a;a;a;c;d;e;e; +b;a;a;c;d;c;d;e;e;e;e;e;b;a;a;a;c;d;e;e;e;e;e;e;b;a;a;a;c;d;e;e;e;e;e; +b;b;a;c;d;e;e;e;e;e;b;a;a;c;d;e;e;e;e;e;e;a;c;d;c;d;e;e;e;e;e;b;b;a;c; +d;e;e;e;e;e;e;a;a;a;a;c;d;e;e;e;e;e;b;a;a;a;a;a;c;d;c;d;e;b;a;a;a;a;a; +c;d;e;e;e;e;e;b;b;a;a;a;a;a;c;d;e;e;e;a;a;a;c;d;e;b;a;a;a;a;a;c;d;e;e; +e;e;e;b;b;a;a;a;a;a;c;d;e;e;a;c;d;e;e;b;a;a;c;d;c;d;e;e;e;e;e;b;b;a;a; +a;a;a;c;d;e;e;e;e;e;b;b;a;a;c;d;e;b;a;c;d;e;b;b;a;a;a;a;c;d;e;b;b;a;a; +a;a;c;d;e;b;a;a;a;a;a;c;d;e;e;e;e;e;b;b;a;a;a;a;a;c;d;e;b;b;b;a;c;d;e; +b;a;a;a;a;a;c;d;e;e;e;e;e;b;b;a;a;a;a;a;c;d;e;b;b;a;a;a;a;a;c;d;e;e; +e;a;a;a;a;c;d;e;b;a;a;a;a;a;c;d;e;e;e;e;e;e;b;a;a;a;a;c;d;e;b;b;a;a; +a;a;a;c;d;c;d;e;e;e;e;e;b;a;a;a;c;d;e;e;e;e;e;e;b;a;a;a;c;d;e;e;e;e;e; +b;b;a;c;d;e;e;e;e;e;b;a;a;c;d;e;e;e;e;e;e;a;c;d;c;d;e;e;e;e;e;b;b;a;c; +d;e;e;e;e;e;e;a;a;a;a;c;d;e;e;e;e;e;b;a;a;a;a;a;c;d;c;d;e;e;e;e;e;b;b; +a;c;d;e;e;e;e;e;e;a;a;a;a;c;d;e;e;e;e;e;b;a;a;a;a;a;c;d;c;d;e;e;e;e;e; +b;b;a;a;a;a;a;c;d;e;e;e;e;e;b;b;a;a;c;d;e;b;b;a;a;a;a;c;d;e;b;a;a;a; +a;a;c;d;e;e;e;e;e;b;b;a;a;c;d;e;e;e;e;e;e;a;a;a;a;a;c;d;e;e;e;e;e;e; +b;b;a;c;d;e;e;e;e;e;e;a;a;a;a;c;d;e;e;e;e;e;b;a;a;a;a;a;c;d;e;e;e;a; +a;a;c;d;e;e;e;a;a;a;c;d;e;e;b;a;c;d;e;b;b;a;a;a;a;a;c;d;e;b;a;c;d;c;d;e;e; +e;e;e;e;b;a;c;d;e;e;e;e;e;e;b;b;a;c;d;e;e;e;e;e;e;b;a;a;a;a;a;c;d;e; +e;e;e;e;e;b;a;a;a;a;c;d;e;b;a;c;d;e;b;a;a;a;c;d;e;b;a;a;a;a;c;d;e;e;e; +e;e;e;e;a;c;d;e;b;a;a;a;a;a;c;d;e;e;e;e;e;e;b;a;a;a;a;c;d;e;e;e;e;e; +e;e;a;a;a;c;d;e;b;a;c;d;e;e;e;e;e;b;b;a;a;a;a;a;c;d;e;e;e;e;e;e;b;a; +a;a;a;c;d;e;b;a;c;d;e;e;e;e;e;e;b;a;c;d;e;e;e;e;e;e;b;a;a;a;c;d;e;e;e; +e;e;b;b;a;a;a;a;a;c;d;e;e;e;e;e;e;a;a;a;c;d;e;e;e;e;e;b;b;a;c;d;e;b; +a;a;a;c;d;c;d;e;e;e;e;e;b;b;a;c;d;e;e;e;e;e;e;a;a;a;a;c;d;e;e;e;e;e;b; +a;a;a;a;a;c;d;c;d;e;b;a;a;a;a;a;c;d;e;e;e;e;e;e;b;a;a;a;a;c;d;e;b;a;c; +d;e;e;e;a;a;a;c;d;e;b;a;c;d;e;b;a;a;a;a;a;c;d;e;e;e;e;e;e;b;a;a;a;a;c; +d;e;b;a;c;d;e;e;a;c;d;e;b;a;c;d;e;e;b;a;a;c;d;c;d;e;e;e;e;e;b;b;a;c;d;e;e;e; +e;e;e;a;a;a;a;c;d;e;e;e;e;e;b;a;a;a;a;a;c;d;c;d;e;b;a;c;d;e;b;a;c;d;e;b; +a;c;d;e;b;a;c;d;e;b;a;c;d;e;b;a;c;d;e;b;a;c;eval $e + +$_=%{q,l= %w{Ruby\\ Quiz Loader} +n,p,a= "\#{q.do#{%w{w a n c}.sort{|o,t|t<=>o}}se.d\x65l\x65t\x65(' ')}.com/", +{"bmJzcA==\n".\x75np\x61ck("m")[0]=>" ","bHQ=\n".\x75np\x61ck((?n-1).chr)[0]=> +:<,"Z3Q=\n".\x75np\x61ck("m")[0]=>:>,"YW1w\n".\x75np\x61ck((?l+1).chr)[0]=>:&}, +[[/^\\s+<\\/div>.+/m,""],[/^\\s+/,""],[/\n/,"\n\n"],[/<br \\/>/,"\n"], +[/<hr \\/>/,"-="*40],[/<[^>]+>/,""],[/^ruby/,""],[/\n{3,}/,"\n\n"]];p\165ts" +\#{l[0..-3]}ing...\n\n";send(Kernel.methods.find_all{|x|x[0]==?e}[-1], +"re\#{q[5...8].downcase}re '111112101110-117114105'.scan(/-|\\\\d{3}/). +inject(''){|m,v|v.length>1?m+v.to_i.chr: m+v}");o#{%w{e P}.sort.join.downcase +}n("http://www.\#{n}"){|w|$F=w.read.sc\x61n(/li>.+?"([^"]+)..([^<]+)/)};\160uts\ +"\#{q}\n\n";$F.\145\141ch{|e|i=e[0][/\\d+/];s="%2s. %s"%[i,e[1]];i.to_i%2==0 ? +\160ut\x73(s) : #{%w{s p}[-1]}rint("%-38s "%s)};p\x72\x69\x6et"\n? ";e\x76al( +['puts"\n\#{l[0..3]}ing...\n\n"','$c=gets.chomp.to_i'].sort.join(";"));#{111.chr +}pen("http://www.\#{n}"+$F[$c-1][0]){|n|$_=n.read[/^\\s+<span.+/m];#{('a'.."z"). +to_a[10-5*2]}.e\141ch{|(z,f)|\x67sub!(z,f)};\147sub!(/&(\\w+);/){|y|p. +ke\171\077($1)?p[$1]:y};while$_=~/([^\n]{81,})/:z=$1.dup;f=$1.dup;f[f.rindex( +" ",80),1]="\n";f.s\165b!(/\n[ \t]+/,"\n");s\165b!(/\#{R\x65g\x65xp. +\x65scap\x65(z)}/,f)end};while\040\163ub!(/^(?:[^\n]*\n){20}/, ""):puts"\#$& +--\x4dO\x52E--";g=$_;g#{"\145"}ts;;#{"excited"[0..4].delete("c")}\040if$_[0]==?q +$_=g;end;$_.d#{"Internet Service Provider".scan(/[A-Z]/).join.downcase +}lay};eval$_ + + d=[30644250780,9003106878, + 30636278846,66641217692,4501790980, + 671_24_603036,131_61973916,66_606629_920, + 30642677916,30643069058];a,s=[],$*[0] + s.each_byte{|b|a<<("%036b"%d[b. + chr.to_i]).scan(/\d{6}/)} + a.transpose.each{ |a| + a.join.each_byte{\ + |i|print i==49?\ + ($*[1]||"#")\ + :32.chr} + puts + } + +#! /usr/bin/env ruby +# License: If Ruby is licensed to the general public in a certain way, this is also licensed in that way. +require'zlib';eval(Zlib::Inflate.inflate("x\332\355WKo\333F\020\276\367W\250\262\001\222\tM\357\246M\017\242\211\242h\200\036\212`\201\026\350\205`\f=h\233\301Zt%\273A-2\277\275\363\315\222\334\241,#v\214\366T\331\262\326\303y\3177\263\243M\371\347]\265)\203UuYnoO\257Wo\203\364>[T\353U\265\276L\257\353\325\235-'\277\226\233ui\323Uy1\251\027\027\341\253\371\346r\e\245u\366\216\205f\263\367\357\336&\353\362S\010zr=\277\3315w\315]r[\237o\333\344c]\255#>\343O\025\352\037\334\177\341\367\364\271\t\003\245\337|\027\304\364aM@:\363\260\316>\237\232\323(\326\252(\327\253\t\275\323\332h\253\224V\306d\247\037\362\371\311}\321\314f\356\363C\016\311\342\365\361ij\026\037\313\345\355\3577\363e\231\224\363\345\325y\315\204]\263l\3620\177\317\241\024M\376\263\235o\267Et\222/\223%\037\213\374D\323\373M\3214Kv-\373<\361\026\233&\\\304\253,\354\270\263\314)\232\3748\311\247]z\216v\3136\235\306\323\243\035\262\263\214\332\f\024\342\257\327\345\264\230\205\313o36\3122\254e2\260\236\2610\202\354\037\260\256 (f=/\313:Z\024\245\313\244Zoo\347\353ey~]\336^\325\253-\a\273k\252fqv6\235\333j\276\355\236tV\252\230\377F\276\n\333\277\257\241\345\206\262\323\306G\273\352\340\203t\332\246\2441`'\316\316\266\245\275H\0032\377l\253\017,=42E\002\360\236\246\345_s;Y\274^\305\367Q\233\036\233\276\016\312\2450=\256=\305U\202\230\254\"\222\265\004\217\237~\373\345\017\"h\243\210\307j\235\251\205V8\353\304X\372!1CGc-\251\240\337\020\317\361#\036\023\n\2556\254Cg3\002}\265\356s\235\202K[K\022\020 \243\206\216\241p3\33255\350\232\036\030q$\233\344!\363\204^},$\023Xg\235:\364r1\"1\344\277\261\207\031(\301DE\260\344\026Y\177\345\036\221\204mP\263\266Mk\305\366\210%3\220\302S\322\306IR\316\377!\203 S\336\310\216\215\203\315\002-\211 5D2\257\210\302\321p\234\364\205\222Jj\220\022E\321h\347\223RQ*94K\022\243\314H`4{LV\003\021N\f\333\364I\347l\327UR\305t\340\332i>\241x=Mu4R\245\373\223\244\251NB\211\247\236\3465\253^bx\332Yc\263\252M\220b\253\220\310\004\331\242\020,`\005T\021Y\251P@\020\365Ax\310z\364\264\240\265vj2\037?0\v\"en\244\374\251\032\225\253v\346\253\3712\215\032\322(o\206~A\006\010\f\324\22357\026\"\316\024\365\021\360@\277:\363.$\f\342\016$\200\v\341\302\230\020\340\341\201K\017\270+i\326-\312\313j\235\n[\376({\330u\254\266\334\034\031\367%:CK\210{\311h\aQH\333Q\023\250\210;e\360\322\362\213\202\247\216\266\340C&(p\274HT7\336&B\352\300\036z\206\204\375 \032z\304\233\217\034\267AK\207R\363\213\324u\334\203\272h\234 \304&\364S\302]|\024\233b\000\023E\034\005\300!\330\2274\026\205\316\363\203\364\"\316\245!\242\360Y?4\204b\023.\2009\036X\300\213p\200]\304\324\200$^\204\025\222D\325X \363\324\004\223\205\207\241M\245\352\341(s\3415\260w\226\313=\2422 \200\177\344\355\211\3350\004\341\217\207\215r%x\030\302\304\230\335{#\250#o\204h\327;\220\242\275B%j&\343e\005\226/\r\200\035\035\206K\243\027\216Z\230\323.\335\356^!\vF\002K\366\246kG\321\364E\301\362\250\275a\f\031\207i%\216\342&ie\205\260\324}\272\252ho\222\306\370\362!}6\364C\003\2717\206'!.\315\036mhMm\370\252\241\365\221g\275\326A\302\254\270X,\371\353\232:\222\321\253\025\217v%\222\023!\243r\272\364(\376\177\236\374\233\363\3048\330b\241xdTp\325\321\377\3428F\234\214\263\357\255f\324\306\226\257\022\"\000\354\003\024C\207\na\353\240&O\305\376\004ncy\350\f\276\357+Q|\201bBi\206\277\345u\251\273\310\367\242\303*\204d\n\271}\016\2345r8\034\201[\343:>\364*\242\266\025+HZ\263e\212\0247q\357\310X\267[\333(9_o}P\201\324>\266\364\000\217hh\352\225a\213q\260\031\334\022sg\360\e\206\234B=\246\2421\341e\364\270\321\224\347\0056L\267\227)\244\210\307\027\257<\343\257\000\303\264u{\235\326\352i\303^\332\200\n\236\243a\277\034J#~S\335'2\371\001q\3745$\356\027^\371\325\344\331\036\362\004\267\330\251<\212\237\257\345kr\371\302d\362r\376\344d\252C\311\374R6\017e\375\005\271yAV\363/\257\345\261(\340hW\020\222\a\027k)60\354\217\363\3501\263rt\0364\025\025|\265\031\355\276d\357\3159\367\225\025\223U\273n\027\324\321H\031\030\036\357\356\377\010\266\337\374\003\3375Q\335")) +#include "ruby.h" /* + /sLaSh * + oBfUsCaTeD RuBy * + cOpYrIgHt 2005 * +bY SiMoN StRaNdGaArD * + #{X=320;Y=200;Z=20} */ + +#define GUN1 42: +#define GUN2 43: +#define bo do +#define when(gun) /**/ +#define DATA "p 'Hello embedded world'" +#define DIRTY(argc,argv)\ +argc,argv,char=eval(\ +"#{DATA.read}\n[3,2,1]"\ +);sun=O.new\ +if(0) + +int +sun[]={12,9,16,9,2,1,7,1,3,9,27,4, 13,2,11,5,4,1,25, +5,0,1,14,9,15,4,26,9,23,2,17,6,31, 6,10,8,22,9,21,1, +24,8,20,8,18,9,29,5,9,5,1,1,28,8,8,1,30, 9,6,8, 5,1, +19,9,36,19,43, 9,34,11,50,19,48,18,49,9, 35,8,42,18, +51,8,44,11,32, 11,47,9,37,1,39,9,38,19, 45,8,40,12, +41,9,46,12,33,1,57,1,85,5,88,28,83,4,87, 6,62,28,89, +9,80,28,60,21,52,21,72,29,54,21,75,8,70,29,58,28,65, +9,91,8,74,29,79,2,77,1,53,1,81,5, 69,2,64,21, 86,29, +67,9,59,1,61,5,73,6,76,28,56,21,68,29,78,29,63,5,66, +28,90,29, 71,4,55,9,84,28,82,29,101,5, 103,9, 98,35, +97,1,94,35,93,1,100,35,92,31,99,5,96,39,95,5,102,35}; + +void run(int gun=0) { // [gun]=[:GUN1,:GUN2] + printf("run() %i\n", gun); + switch(gun) { + case GUN1 when(2) + printf("when2\n"); + break; // end + case GUN2 when(3) + printf("when3\n"); + break; // end + } +} + +int main(int argc, char** argv) { + printf("hello world. number of arguments=%i\n", argc); + int fun=5; + bo { + fun -= 1; //.id - gun = fun + run(fun); + } while(fun>0); + ruby_init(); + rb_eval_string(DATA); + return 0; +} + +#if 0 // nobody reads un-defined code +__END__ +#CODE +def goto*s;$s=[];Y.times{s=[];X.times{s<<[0]*3};$s<< s}end;A=0.5 +include Math;def u g,h,i,j,k,l;f,*m=((j-h).abs>(k-i).abs)?[proc{ +|n,o| g[o] [n ]=l },[h ,i ],[j,k]]:[proc{ +|p,q| g[ p][ q] =l} ,[ i,h ], [k,j]];b,a=m.sort +c,d=a [1 ]-b [1 ],a [0 ]-b [0 ];d.times{|e|f. +call( e+b[ 0] ,c* e/d+b [1])};end;V=0;def bo&u +$u||= V; ;$u += 1+V ;; return u.call if$u>1;q=128.0 +;x=(V .. 255 ). map {| y|f1,z =sin(y.to_f*PI/q), +sin(( y. to_f + 200 )*PI/( q));[(f1*30.0+110.0). +to_i,((f1+z)*10.0+40.0).to_i,(z*20.0+120.0).to_i]};Y.times{|i|X. +times{|j|i1=((i*0.3+150)*(j*1.1+50)/50.0).to_i;i2=((i*0.8+510)*( +j*0.9+1060)/51.0).to_i;$s[i][j]=x[(i1*i2)%255].clone}};$a=(0..25). +inject([]){|a,i|a<<(V..3).inject([]){|r,j|r<<$c[i*4+j]}};u.call;end +I=LocalJumpError;def run*a,&b;return if a.size==V;if a[V]==666;$b=b +elsif$b;$b.call;end;end;def main s,&u;$m=V;u.call rescue I;end +def rb_eval_string(*a);end # you promised not to look here +def ruby_init;q=2.0;l=((X**q)*A+(Y**q)*A)**A;V.upto(Y-4){|s|V. +upto(X-4){|q|d=((q-X/A)**q+(s-Y/A)**q)**A;e=(cos(d*PI/(l/q))/q ++A)*3.0+1.0;v=2;f=v/e;a,p,b=$s[s],$s[s+1],$s[s+v];r=a[q][V]*e+ +p[q][V]+a[q+1][V]+b[q][V]+a[q+v][V]+b[q+v/v][V]+p[q+v][V]+b[q+ +v][V]*f;g=[a[q][V],b[q][V],a[q+v][V],b[q+v][V]];h=(g.max-g.min +)*f;$s[s][q][V]=[[(r/(e+f+6.0)+A+(h*0.4)).to_i,255].min,V].max +}};File.open("res.ppm","w+"){|f|f.write(# secret.greetings :-) +"P3\n# res.ppm\n#{X} #{Y}\n255\n"+$s.map{|a|a.map{|b|b.join' ' +}.join(' ')+"\n"}.join)};end;def switch i,&b;b.call;return unless +defined?($m);b=(X*0.01).to_i;d=1.0/40.0;e=0.09;c=(Y*0.01).to_i +a=$a.map{|(f,g,h,j)|[f*d,g*e,h*d,j*e]};a.each{|(k,l,m,n)|u($s,(k*X +).to_i+b+i,(l*Y).to_i+c+i,(m*X).to_i+b+i,(n*Y).to_i+c+i,[Z]*3)} +a.each{|(o,q,r,s)|u($s,(o*(X-Z)).to_i+i,(q*(Y-Z)).to_i+i,(r*(X- +Z)).to_i+i,(s*(Y-Z)).to_i+i,[(1<<8)-1]*3)};end;Q=Object;class +Regexp;def []=(v,is);is.each{|s|Q.send(:remove_const,s)if Q. +const_defined? s;Q.const_set(s,v)};end;end;def int*ptr;666 +end;class O;def []=(a,b=nil);$c=a;end;end;alias:void:goto +#endif // pretend as if you havn't seen anything + +module CodeRay + module Scanners + +class Ruby < Scanner + + RESERVED_WORDS = [ + 'and', 'def', 'end', 'in', 'or', 'unless', 'begin', + 'defined?', 'ensure', 'module', 'redo', 'super', 'until', + 'BEGIN', 'break', 'do', 'next', 'rescue', 'then', + 'when', 'END', 'case', 'else', 'for', 'retry', + 'while', 'alias', 'class', 'elsif', 'if', 'not', 'return', + 'undef', 'yield', + ] + + DEF_KEYWORDS = ['def'] + MODULE_KEYWORDS = ['class', 'module'] + DEF_NEW_STATE = WordList.new(:initial). + add(DEF_KEYWORDS, :def_expected). + add(MODULE_KEYWORDS, :module_expected) + + WORDS_ALLOWING_REGEXP = [ + 'and', 'or', 'not', 'while', 'until', 'unless', 'if', 'elsif', 'when' + ] + REGEXP_ALLOWED = WordList.new(false). + add(WORDS_ALLOWING_REGEXP, :set) + + PREDEFINED_CONSTANTS = [ + 'nil', 'true', 'false', 'self', + 'DATA', 'ARGV', 'ARGF', '__FILE__', '__LINE__', + ] + + IDENT_KIND = WordList.new(:ident). + add(RESERVED_WORDS, :reserved). + add(PREDEFINED_CONSTANTS, :pre_constant) + + METHOD_NAME = / #{IDENT} [?!]? /xo + METHOD_NAME_EX = / + #{METHOD_NAME} # common methods: split, foo=, empty?, gsub! + | \*\*? # multiplication and power + | [-+~]@? # plus, minus + | [\/%&|^`] # division, modulo or format strings, &and, |or, ^xor, `system` + | \[\]=? # array getter and setter + | <=?>? | >=? # comparison, rocket operator + | << | >> # append or shift left, shift right + | ===? # simple equality and case equality + /ox + GLOBAL_VARIABLE = / \$ (?: #{IDENT} | \d+ | [~&+`'=\/,;_.<>!@0$?*":F\\] | -[a-zA-Z_0-9] ) /ox + + DOUBLEQ = / " [^"\#\\]* (?: (?: \#\{.*?\} | \#(?:$")? | \\. ) [^"\#\\]* )* "? /ox + SINGLEQ = / ' [^'\\]* (?: \\. [^'\\]* )* '? /ox + STRING = / #{SINGLEQ} | #{DOUBLEQ} /ox + SHELL = / ` [^`\#\\]* (?: (?: \#\{.*?\} | \#(?:$`)? | \\. ) [^`\#\\]* )* `? /ox + REGEXP = / \/ [^\/\#\\]* (?: (?: \#\{.*?\} | \#(?:$\/)? | \\. ) [^\/\#\\]* )* \/? /ox + + DECIMAL = /\d+(?:_\d+)*/ # doesn't recognize 09 as octal error + OCTAL = /0_?[0-7]+(?:_[0-7]+)*/ + HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/ + BINARY = /0b[01]+(?:_[01]+)*/ + + EXPONENT = / [eE] [+-]? #{DECIMAL} /ox + FLOAT = / #{DECIMAL} (?: #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? ) / + INTEGER = /#{OCTAL}|#{HEXADECIMAL}|#{BINARY}|#{DECIMAL}/ + + def reset + super + @regexp_allowed = false + end + + def next_token + return if @scanner.eos? + + kind = :error + if @scanner.scan(/\s+/) # in every state + kind = :space + @regexp_allowed = :set if @regexp_allowed or @scanner.matched.index(?\n) # delayed flag setting + + elsif @state == :def_expected + if @scanner.scan(/ (?: (?:#{IDENT}(?:\.|::))* | (?:@@?|$)? #{IDENT}(?:\.|::) ) #{METHOD_NAME_EX} /ox) + kind = :method + @state = :initial + else + @scanner.getch + end + @state = :initial + + elsif @state == :module_expected + if @scanner.scan(/<</) + kind = :operator + else + if @scanner.scan(/ (?: #{IDENT} (?:\.|::))* #{IDENT} /ox) + kind = :method + else + @scanner.getch + end + @state = :initial + end + + elsif # state == :initial + # IDENTIFIERS, KEYWORDS + if @scanner.scan(GLOBAL_VARIABLE) + kind = :global_variable + elsif @scanner.scan(/ @@ #{IDENT} /ox) + kind = :class_variable + elsif @scanner.scan(/ @ #{IDENT} /ox) + kind = :instance_variable + elsif @scanner.scan(/ __END__\n ( (?!\#CODE\#) .* )? | \#[^\n]* | =begin(?=\s).*? \n=end(?=\s|\z)(?:[^\n]*)? /mx) + kind = :comment + elsif @scanner.scan(METHOD_NAME) + if @last_token_dot + kind = :ident + else + matched = @scanner.matched + kind = IDENT_KIND[matched] + if kind == :ident and matched =~ /^[A-Z]/ + kind = :constant + elsif kind == :reserved + @state = DEF_NEW_STATE[matched] + @regexp_allowed = REGEXP_ALLOWED[matched] + end + end + + elsif @scanner.scan(STRING) + kind = :string + elsif @scanner.scan(SHELL) + kind = :shell + elsif @scanner.scan(/<< + (?: + ([a-zA-Z_0-9]+) + (?: .*? ^\1$ | .* ) + | + -([a-zA-Z_0-9]+) + (?: .*? ^\s*\2$ | .* ) + | + (["\'`]) (.+?) \3 + (?: .*? ^\4$ | .* ) + | + - (["\'`]) (.+?) \5 + (?: .*? ^\s*\6$ | .* ) + ) + /mxo) + kind = :string + elsif @scanner.scan(/\//) and @regexp_allowed + @scanner.unscan + @scanner.scan(REGEXP) + kind = :regexp +/%(?:[Qqxrw](?:\([^)#\\\\]*(?:(?:#\{.*?\}|#|\\\\.)[^)#\\\\]*)*\)?|\[[^\]#\\\\]*(?:(?:#\{.*?\}|#|\\\\.)[^\]#\\\\]*)*\]?|\{[^}#\\\\]*(?:(?:#\{.*?\}|#|\\\\.)[^}#\\\\]*)*\}?|<[^>#\\\\]*(?:(?:#\{.*?\}|#|\\\\.)[^>#\\\\]*)*>?|([^a-zA-Z\\\\])(?:(?!\1)[^#\\\\])*(?:(?:#\{.*?\}|#|\\\\.)(?:(?!\1)[^#\\\\])*)*\1?)|\([^)#\\\\]*(?:(?:#\{.*?\}|#|\\\\.)[^)#\\\\]*)*\)?|\[[^\]#\\\\]*(?:(?:#\{.*?\}|#|\\\\.)[^\]#\\\\]*)*\]?|\{[^}#\\\\]*(?:(?:#\{.*?\}|#|\\\\.)[^}#\\\\]*)*\}?|<[^>#\\\\]*(?:(?:#\{.*?\}|#|\\\\.)[^>#\\\\]*)*>?|([^a-zA-Z\s\\\\])(?:(?!\2)[^#\\\\])*(?:(?:#\{.*?\}|#|\\\\.)(?:(?!\2)[^#\\\\])*)*\2?|\\\\[^#\\\\]*(?:(?:#\{.*?\}|#)[^#\\\\]*)*\\\\?)/ + elsif @scanner.scan(/:(?:#{GLOBAL_VARIABLE}|#{METHOD_NAME_EX}|#{STRING})/ox) + kind = :symbol + elsif @scanner.scan(/ + \? (?: + [^\s\\] + | + \\ (?:M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-))? (?: \\ (?: . | [0-7]{3} | x[0-9A-Fa-f][0-9A-Fa-f] ) + ) + /mox) + kind = :integer + + elsif @scanner.scan(/ [-+*\/%=<>;,|&!()\[\]{}~?] | \.\.?\.? | ::? /x) + kind = :operator + @regexp_allowed = :set if @scanner.matched[-1,1] =~ /[~=!<>|&^,\(\[+\-\/\*%]\z/ + elsif @scanner.scan(FLOAT) + kind = :float + elsif @scanner.scan(INTEGER) + kind = :integer + else + @scanner.getch + end + end + + token = Token.new @scanner.matched, kind + + if kind == :regexp + token.text << @scanner.scan(/[eimnosux]*/) + end + + @regexp_allowed = (@regexp_allowed == :set) # delayed flag setting + + token + end +end + +register Ruby, 'ruby', 'rb' + + end +end +class Set + include Enumerable + + # Creates a new set containing the given objects. + def self.[](*ary) + new(ary) + end + + # Creates a new set containing the elements of the given enumerable + # object. + # + # If a block is given, the elements of enum are preprocessed by the + # given block. + def initialize(enum = nil, &block) # :yields: o + @hash ||= Hash.new + + enum.nil? and return + + if block + enum.each { |o| add(block[o]) } + else + merge(enum) + end + end + + # Copy internal hash. + def initialize_copy(orig) + @hash = orig.instance_eval{@hash}.dup + end + + # Returns the number of elements. + def size + @hash.size + end + alias length size + + # Returns true if the set contains no elements. + def empty? + @hash.empty? + end + + # Removes all elements and returns self. + def clear + @hash.clear + self + end + + # Replaces the contents of the set with the contents of the given + # enumerable object and returns self. + def replace(enum) + if enum.class == self.class + @hash.replace(enum.instance_eval { @hash }) + else + enum.is_a?(Enumerable) or raise ArgumentError, "value must be enumerable" + clear + enum.each { |o| add(o) } + end + + self + end + + # Converts the set to an array. The order of elements is uncertain. + def to_a + @hash.keys + end + + def flatten_merge(set, seen = Set.new) + set.each { |e| + if e.is_a?(Set) + if seen.include?(e_id = e.object_id) + raise ArgumentError, "tried to flatten recursive Set" + end + + seen.add(e_id) + flatten_merge(e, seen) + seen.delete(e_id) + else + add(e) + end + } + + self + end + protected :flatten_merge + + # Returns a new set that is a copy of the set, flattening each + # containing set recursively. + def flatten + self.class.new.flatten_merge(self) + end + + # Equivalent to Set#flatten, but replaces the receiver with the + # result in place. Returns nil if no modifications were made. + def flatten! + if detect { |e| e.is_a?(Set) } + replace(flatten()) + else + nil + end + end + + # Returns true if the set contains the given object. + def include?(o) + @hash.include?(o) + end + alias member? include? + + # Returns true if the set is a superset of the given set. + def superset?(set) + set.is_a?(Set) or raise ArgumentError, "value must be a set" + return false if size < set.size + set.all? { |o| include?(o) } + end + + # Returns true if the set is a proper superset of the given set. + def proper_superset?(set) + set.is_a?(Set) or raise ArgumentError, "value must be a set" + return false if size <= set.size + set.all? { |o| include?(o) } + end + + # Returns true if the set is a subset of the given set. + def subset?(set) + set.is_a?(Set) or raise ArgumentError, "value must be a set" + return false if set.size < size + all? { |o| set.include?(o) } + end + + # Returns true if the set is a proper subset of the given set. + def proper_subset?(set) + set.is_a?(Set) or raise ArgumentError, "value must be a set" + return false if set.size <= size + all? { |o| set.include?(o) } + end + + # Calls the given block once for each element in the set, passing + # the element as parameter. + def each + @hash.each_key { |o| yield(o) } + self + end + + # Adds the given object to the set and returns self. Use +merge+ to + # add several elements at once. + def add(o) + @hash[o] = true + self + end + alias << add + + # Adds the given object to the set and returns self. If the + # object is already in the set, returns nil. + def add?(o) + if include?(o) + nil + else + add(o) + end + end + + # Deletes the given object from the set and returns self. Use +subtract+ to + # delete several items at once. + def delete(o) + @hash.delete(o) + self + end + + # Deletes the given object from the set and returns self. If the + # object is not in the set, returns nil. + def delete?(o) + if include?(o) + delete(o) + else + nil + end + end + + # Deletes every element of the set for which block evaluates to + # true, and returns self. + def delete_if + @hash.delete_if { |o,| yield(o) } + self + end + + # Do collect() destructively. + def collect! + set = self.class.new + each { |o| set << yield(o) } + replace(set) + end + alias map! collect! + + # Equivalent to Set#delete_if, but returns nil if no changes were + # made. + def reject! + n = size + delete_if { |o| yield(o) } + size == n ? nil : self + end + + # Merges the elements of the given enumerable object to the set and + # returns self. + def merge(enum) + if enum.is_a?(Set) + @hash.update(enum.instance_eval { @hash }) + else + enum.is_a?(Enumerable) or raise ArgumentError, "value must be enumerable" + enum.each { |o| add(o) } + end + + self + end + + # Deletes every element that appears in the given enumerable object + # and returns self. + def subtract(enum) + enum.is_a?(Enumerable) or raise ArgumentError, "value must be enumerable" + enum.each { |o| delete(o) } + self + end + + # Returns a new set built by merging the set and the elements of the + # given enumerable object. + def |(enum) + enum.is_a?(Enumerable) or raise ArgumentError, "value must be enumerable" + dup.merge(enum) + end + alias + | ## + alias union | ## + + # Returns a new set built by duplicating the set, removing every + # element that appears in the given enumerable object. + def -(enum) + enum.is_a?(Enumerable) or raise ArgumentError, "value must be enumerable" + dup.subtract(enum) + end + alias difference - ## + + # Returns a new array containing elements common to the set and the + # given enumerable object. + def &(enum) + enum.is_a?(Enumerable) or raise ArgumentError, "value must be enumerable" + n = self.class.new + enum.each { |o| n.add(o) if include?(o) } + n + end + alias intersection & ## + + # Returns a new array containing elements exclusive between the set + # and the given enumerable object. (set ^ enum) is equivalent to + # ((set | enum) - (set & enum)). + def ^(enum) + enum.is_a?(Enumerable) or raise ArgumentError, "value must be enumerable" + n = dup + enum.each { |o| if n.include?(o) then n.delete(o) else n.add(o) end } + n + end + + # Returns true if two sets are equal. The equality of each couple + # of elements is defined according to Object#eql?. + def ==(set) + equal?(set) and return true + + set.is_a?(Set) && size == set.size or return false + + hash = @hash.dup + set.all? { |o| hash.include?(o) } + end + + def hash # :nodoc: + @hash.hash + end + + def eql?(o) # :nodoc: + return false unless o.is_a?(Set) + @hash.eql?(o.instance_eval{@hash}) + end + + # Classifies the set by the return value of the given block and + # returns a hash of {value => set of elements} pairs. The block is + # called once for each element of the set, passing the element as + # parameter. + # + # e.g.: + # + # require 'set' + # files = Set.new(Dir.glob("*.rb")) + # hash = files.classify { |f| File.mtime(f).year } + # p hash # => {2000=>#<Set: {"a.rb", "b.rb"}>, + # # 2001=>#<Set: {"c.rb", "d.rb", "e.rb"}>, + # # 2002=>#<Set: {"f.rb"}>} + def classify # :yields: o + h = {} + + each { |i| + x = yield(i) + (h[x] ||= self.class.new).add(i) + } + + h + end + + # Divides the set into a set of subsets according to the commonality + # defined by the given block. + # + # If the arity of the block is 2, elements o1 and o2 are in common + # if block.call(o1, o2) is true. Otherwise, elements o1 and o2 are + # in common if block.call(o1) == block.call(o2). + # + # e.g.: + # + # require 'set' + # numbers = Set[1, 3, 4, 6, 9, 10, 11] + # set = numbers.divide { |i,j| (i - j).abs == 1 } + # p set # => #<Set: {#<Set: {1}>, + # # #<Set: {11, 9, 10}>, + # # #<Set: {3, 4}>, + # # #<Set: {6}>}> + def divide(&func) + if func.arity == 2 + require 'tsort' + + class << dig = {} # :nodoc: + include TSort + + alias tsort_each_node each_key + def tsort_each_child(node, &block) + fetch(node).each(&block) + end + end + + each { |u| + dig[u] = a = [] + each{ |v| func.call(u, v) and a << v } + } + + set = Set.new() + dig.each_strongly_connected_component { |css| + set.add(self.class.new(css)) + } + set + else + Set.new(classify(&func).values) + end + end + + InspectKey = :__inspect_key__ # :nodoc: + + # Returns a string containing a human-readable representation of the + # set. ("#<Set: {element1, element2, ...}>") + def inspect + ids = (Thread.current[InspectKey] ||= []) + + if ids.include?(object_id) + return sprintf('#<%s: {...}>', self.class.name) + end + + begin + ids << object_id + return sprintf('#<%s: {%s}>', self.class, to_a.inspect[1..-2]) + ensure + ids.pop + end + end + + def pretty_print(pp) # :nodoc: + pp.text sprintf('#<%s: {', self.class.name) + pp.nest(1) { + pp.seplist(self) { |o| + pp.pp o + } + } + pp.text "}>" + end + + def pretty_print_cycle(pp) # :nodoc: + pp.text sprintf('#<%s: {%s}>', self.class.name, empty? ? '' : '...') + end +end + +# SortedSet implements a set which elements are sorted in order. See Set. +class SortedSet < Set + @@setup = false + + class << self + def [](*ary) # :nodoc: + new(ary) + end + + def setup # :nodoc: + @@setup and return + + begin + require 'rbtree' + + module_eval %{ + def initialize(*args, &block) + @hash = RBTree.new + super + end + } + rescue LoadError + module_eval %{ + def initialize(*args, &block) + @keys = nil + super + end + + def clear + @keys = nil + super + end + + def replace(enum) + @keys = nil + super + end + + def add(o) + @keys = nil + @hash[o] = true + self + end + alias << add + + def delete(o) + @keys = nil + @hash.delete(o) + self + end + + def delete_if + n = @hash.size + @hash.delete_if { |o,| yield(o) } + @keys = nil if @hash.size != n + self + end + + def merge(enum) + @keys = nil + super + end + + def each + to_a.each { |o| yield(o) } + end + + def to_a + (@keys = @hash.keys).sort! unless @keys + @keys + end + } + end + + @@setup = true + end + end + + def initialize(*args, &block) # :nodoc: + SortedSet.setup + initialize(*args, &block) + end +end + +module Enumerable + # Makes a set from the enumerable object with given arguments. + def to_set(klass = Set, *args, &block) + klass.new(self, *args, &block) + end +end + +# =begin +# == RestricedSet class +# RestricedSet implements a set with restrictions defined by a given +# block. +# +# === Super class +# Set +# +# === Class Methods +# --- RestricedSet::new(enum = nil) { |o| ... } +# --- RestricedSet::new(enum = nil) { |rset, o| ... } +# Creates a new restricted set containing the elements of the given +# enumerable object. Restrictions are defined by the given block. +# +# If the block's arity is 2, it is called with the RestrictedSet +# itself and an object to see if the object is allowed to be put in +# the set. +# +# Otherwise, the block is called with an object to see if the object +# is allowed to be put in the set. +# +# === Instance Methods +# --- restriction_proc +# Returns the restriction procedure of the set. +# +# =end +# +# class RestricedSet < Set +# def initialize(*args, &block) +# @proc = block or raise ArgumentError, "missing a block" +# +# if @proc.arity == 2 +# instance_eval %{ +# def add(o) +# @hash[o] = true if @proc.call(self, o) +# self +# end +# alias << add +# +# def add?(o) +# if include?(o) || !@proc.call(self, o) +# nil +# else +# @hash[o] = true +# self +# end +# end +# +# def replace(enum) +# enum.is_a?(Enumerable) or raise ArgumentError, "value must be enumerable" +# clear +# enum.each { |o| add(o) } +# +# self +# end +# +# def merge(enum) +# enum.is_a?(Enumerable) or raise ArgumentError, "value must be enumerable" +# enum.each { |o| add(o) } +# +# self +# end +# } +# else +# instance_eval %{ +# def add(o) +# if @proc.call(o) +# @hash[o] = true +# end +# self +# end +# alias << add +# +# def add?(o) +# if include?(o) || !@proc.call(o) +# nil +# else +# @hash[o] = true +# self +# end +# end +# } +# end +# +# super(*args) +# end +# +# def restriction_proc +# @proc +# end +# end + +if $0 == __FILE__ + eval DATA.read, nil, $0, __LINE__+4 +end + +# = rweb - CGI Support Library +# +# Author:: Johannes Barre (mailto:rweb@igels.net) +# Copyright:: Copyright (c) 2003, 04 by Johannes Barre +# License:: GNU Lesser General Public License (COPYING, http://www.gnu.org/copyleft/lesser.html) +# Version:: 0.1.0 +# CVS-ID:: $Id: rweb.rb 6 2004-06-16 15:56:26Z igel $ +# +# == What is Rweb? +# Rweb is a replacement for the cgi class included in the ruby distribution. +# +# == How to use +# +# === Basics +# +# This class is made to be as easy as possible to use. An example: +# +# require "rweb" +# +# web = Rweb.new +# web.out do +# web.puts "Hello world!" +# end +# +# The visitor will get a simple "Hello World!" in his browser. Please notice, +# that won't set html-tags for you, so you should better do something like this: +# +# require "rweb" +# +# web = Rweb.new +# web.out do +# web.puts "<html><body>Hello world!</body></html>" +# end +# +# === Set headers +# Of course, it's also possible to tell the browser, that the content of this +# page is plain text instead of html code: +# +# require "rweb" +# +# web = Rweb.new +# web.out do +# web.header("content-type: text/plain") +# web.puts "Hello plain world!" +# end +# +# Please remember, headers can't be set after the page content has been send. +# You have to set all nessessary headers before the first puts oder print. It's +# possible to cache the content until everything is complete. Doing it this +# way, you can set headers everywhere. +# +# If you set a header twice, the second header will replace the first one. The +# header name is not casesensitive, it will allways converted in to the +# capitalised form suggested by the w3c (http://w3.org) +# +# === Set cookies +# Setting cookies is quite easy: +# include 'rweb' +# +# web = Rweb.new +# Cookie.new("Visits", web.cookies['visits'].to_i +1) +# web.out do +# web.puts "Welcome back! You visited this page #{web.cookies['visits'].to_i +1} times" +# end +# +# See the class Cookie for more details. +# +# === Get form and cookie values +# There are four ways to submit data from the browser to the server and your +# ruby script: via GET, POST, cookies and file upload. Rweb doesn't support +# file upload by now. +# +# include 'rweb' +# +# web = Rweb.new +# web.out do +# web.print "action: #{web.get['action']} " +# web.puts "The value of the cookie 'visits' is #{web.cookies['visits']}" +# web.puts "The post parameter 'test['x']' is #{web.post['test']['x']}" +# end + +RWEB_VERSION = "0.1.0" +RWEB = "rweb/#{RWEB_VERSION}" + +#require 'rwebcookie' -> edit by bunny :-) + +class Rweb + # All parameter submitted via the GET method are available in attribute + # get. This is Hash, where every parameter is available as a key-value + # pair. + # + # If your input tag has a name like this one, it's value will be available + # as web.get["fieldname"] + # <input name="fieldname"> + # You can submit values as a Hash + # <input name="text['index']"> + # <input name="text['index2']"> + # will be available as + # web.get["text"]["index"] + # web.get["text"]["index2"] + # Integers are also possible + # <input name="int[2]"> + # <input name="int[3]['hi']> + # will be available as + # web.get["int"][2] + # web.get["int"][3]["hi"] + # If you specify no index, the lowest unused index will be used: + # <input name="int[]"><!-- First Field --> + # <input name="int[]"><!-- Second one --> + # will be available as + # web.get["int"][0] # First Field + # web.get["int"][1] # Second one + # Please notice, this doesn'd work like you might expect: + # <input name="text[index]"> + # It will not be available as web.get["text"]["index"] but + # web.get["text[index]"] + attr_reader :get + + # All parameters submitted via POST are available in the attribute post. It + # works like the get attribute. + # <input name="text[0]"> + # will be available as + # web.post["text"][0] + attr_reader :post + + # All cookies submitted by the browser are available in cookies. This is a + # Hash, where every cookie is a key-value pair. + attr_reader :cookies + + # The name of the browser identification is submitted as USER_AGENT and + # available in this attribute. + attr_reader :user_agent + + # The IP address of the client. + attr_reader :remote_addr + + # Creates a new Rweb object. This should only done once. You can set various + # options via the settings hash. + # + # "cache" => true: Everything you script send to the client will be cached + # until the end of the out block or until flush is called. This way, you + # can modify headers and cookies even after printing something to the client. + # + # "safe" => level: Changes the $SAFE attribute. By default, $SAFE will be set + # to 1. If $SAFE is already higher than this value, it won't be changed. + # + # "silend" => true: Normaly, Rweb adds automaticly a header like this + # "X-Powered-By: Rweb/x.x.x (Ruby/y.y.y)". With the silend option you can + # suppress this. + def initialize (settings = {}) + # {{{ + @header = {} + @cookies = {} + @get = {} + @post = {} + + # Internal attributes + @status = nil + @reasonPhrase = nil + @setcookies = [] + @output_started = false; + @output_allowed = false; + + @mod_ruby = false + @env = ENV.to_hash + + if defined?(MOD_RUBY) + @output_method = "mod_ruby" + @mod_ruby = true + elsif @env['SERVER_SOFTWARE'] =~ /^Microsoft-IIS/i + @output_method = "nph" + else + @output_method = "ph" + end + + unless settings.is_a?(Hash) + raise TypeError, "settings must be a Hash" + end + @settings = settings + + unless @settings.has_key?("safe") + @settings["safe"] = 1 + end + + if $SAFE < @settings["safe"] + $SAFE = @settings["safe"] + end + + unless @settings.has_key?("cache") + @settings["cache"] = false + end + + # mod_ruby sets no QUERY_STRING variable, if no GET-Parameters are given + unless @env.has_key?("QUERY_STRING") + @env["QUERY_STRING"] = "" + end + + # Now we split the QUERY_STRING by the seperators & and ; or, if + # specified, settings['get seperator'] + unless @settings.has_key?("get seperator") + get_args = @env['QUERY_STRING'].split(/[&;]/) + else + get_args = @env['QUERY_STRING'].split(@settings['get seperator']) + end + + get_args.each do | arg | + arg_key, arg_val = arg.split(/=/, 2) + arg_key = Rweb::unescape(arg_key) + arg_val = Rweb::unescape(arg_val) + + # Parse names like name[0], name['text'] or name[] + pattern = /^(.+)\[("[^\]]*"|'[^\]]*'|[0-9]*)\]$/ + keys = [] + while match = pattern.match(arg_key) + arg_key = match[1] + keys = [match[2]] + keys + end + keys = [arg_key] + keys + + akt = @get + last = nil + lastkey = nil + keys.each do |key| + if key == "" + # No key specified (like in "test[]"), so we use the + # lowerst unused Integer as key + key = 0 + while akt.has_key?(key) + key += 1 + end + elsif /^[0-9]*$/ =~ key + # If the index is numerical convert it to an Integer + key = key.to_i + elsif key[0].chr == "'" || key[0].chr == '"' + key = key[1, key.length() -2] + end + if !akt.has_key?(key) || !akt[key].class == Hash + # create an empty Hash if there isn't already one + akt[key] = {} + end + last = akt + lastkey = key + akt = akt[key] + end + last[lastkey] = arg_val + end + + if @env['REQUEST_METHOD'] == "POST" + if @env.has_key?("CONTENT_TYPE") && @env['CONTENT_TYPE'] == "application/x-www-form-urlencoded" && @env.has_key?('CONTENT_LENGTH') + unless @settings.has_key?("post seperator") + post_args = $stdin.read(@env['CONTENT_LENGTH'].to_i).split(/[&;]/) + else + post_args = $stdin.read(@env['CONTENT_LENGTH'].to_i).split(@settings['post seperator']) + end + post_args.each do | arg | + arg_key, arg_val = arg.split(/=/, 2) + arg_key = Rweb::unescape(arg_key) + arg_val = Rweb::unescape(arg_val) + + # Parse names like name[0], name['text'] or name[] + pattern = /^(.+)\[("[^\]]*"|'[^\]]*'|[0-9]*)\]$/ + keys = [] + while match = pattern.match(arg_key) + arg_key = match[1] + keys = [match[2]] + keys + end + keys = [arg_key] + keys + + akt = @post + last = nil + lastkey = nil + keys.each do |key| + if key == "" + # No key specified (like in "test[]"), so we use + # the lowerst unused Integer as key + key = 0 + while akt.has_key?(key) + key += 1 + end + elsif /^[0-9]*$/ =~ key + # If the index is numerical convert it to an Integer + key = key.to_i + elsif key[0].chr == "'" || key[0].chr == '"' + key = key[1, key.length() -2] + end + if !akt.has_key?(key) || !akt[key].class == Hash + # create an empty Hash if there isn't already one + akt[key] = {} + end + last = akt + lastkey = key + akt = akt[key] + end + last[lastkey] = arg_val + end + else + # Maybe we should print a warning here? + $stderr.print("Unidentified form data recived and discarded.") + end + end + + if @env.has_key?("HTTP_COOKIE") + cookie = @env['HTTP_COOKIE'].split(/; ?/) + cookie.each do | c | + cookie_key, cookie_val = c.split(/=/, 2) + + @cookies [Rweb::unescape(cookie_key)] = Rweb::unescape(cookie_val) + end + end + + if defined?(@env['HTTP_USER_AGENT']) + @user_agent = @env['HTTP_USER_AGENT'] + else + @user_agent = nil; + end + + if defined?(@env['REMOTE_ADDR']) + @remote_addr = @env['REMOTE_ADDR'] + else + @remote_addr = nil + end + # }}} + end + + # Prints a String to the client. If caching is enabled, the String will + # buffered until the end of the out block ends. + def print(str = "") + # {{{ + unless @output_allowed + raise "You just can write to output inside of a Rweb::out-block" + end + + if @settings["cache"] + @buffer += [str.to_s] + else + unless @output_started + sendHeaders + end + $stdout.print(str) + end + nil + # }}} + end + + # Prints a String to the client and adds a line break at the end. Please + # remember, that a line break is not visible in HTML, use the <br> HTML-Tag + # for this. If caching is enabled, the String will buffered until the end + # of the out block ends. + def puts(str = "") + # {{{ + self.print(str + "\n") + # }}} + end + + # Alias to print. + def write(str = "") + # {{{ + self.print(str) + # }}} + end + + # If caching is enabled, all cached data are send to the cliend and the + # cache emptied. + def flush + # {{{ + unless @output_allowed + raise "You can't use flush outside of a Rweb::out-block" + end + buffer = @buffer.join + + unless @output_started + sendHeaders + end + $stdout.print(buffer) + + @buffer = [] + # }}} + end + + # Sends one or more header to the client. All headers are cached just + # before body data are send to the client. If the same header are set + # twice, only the last value is send. + # + # Example: + # web.header("Last-Modified: Mon, 16 Feb 2004 20:15:41 GMT") + # web.header("Location: http://www.ruby-lang.org") + # + # You can specify more than one header at the time by doing something like + # this: + # web.header("Content-Type: text/plain\nContent-Length: 383") + # or + # web.header(["Content-Type: text/plain", "Content-Length: 383"]) + def header(str) + # {{{ + if @output_started + raise "HTTP-Headers are already send. You can't change them after output has started!" + end + unless @output_allowed + raise "You just can set headers inside of a Rweb::out-block" + end + if str.is_a?Array + str.each do | value | + self.header(value) + end + + elsif str.split(/\n/).length > 1 + str.split(/\n/).each do | value | + self.header(value) + end + + elsif str.is_a? String + str.gsub!(/\r/, "") + + if (str =~ /^HTTP\/1\.[01] [0-9]{3} ?.*$/) == 0 + pattern = /^HTTP\/1.[01] ([0-9]{3}) ?(.*)$/ + + result = pattern.match(str) + self.setstatus(result[0], result[1]) + elsif (str =~ /^status: [0-9]{3} ?.*$/i) == 0 + pattern = /^status: ([0-9]{3}) ?(.*)$/i + + result = pattern.match(str) + self.setstatus(result[0], result[1]) + else + a = str.split(/: ?/, 2) + + @header[a[0].downcase] = a[1] + end + end + # }}} + end + + # Changes the status of this page. There are several codes like "200 OK", + # "302 Found", "404 Not Found" or "500 Internal Server Error". A list of + # all codes is available at + # http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10 + # + # You can just send the code number, the reason phrase will be added + # automaticly with the recommendations from the w3c if not specified. If + # you set the status twice or more, only the last status will be send. + # Examples: + # web.status("401 Unauthorized") + # web.status("410 Sad but true, this lonely page is gone :(") + # web.status(206) + # web.status("400") + # + # The default status is "200 OK". If a "Location" header is set, the + # default status is "302 Found". + def status(str) + # {{{ + if @output_started + raise "HTTP-Headers are already send. You can't change them after output has started!" + end + unless @output_allowed + raise "You just can set headers inside of a Rweb::out-block" + end + if str.is_a?Integer + @status = str + elsif str.is_a?String + p1 = /^([0-9]{3}) ?(.*)$/ + p2 = /^HTTP\/1\.[01] ([0-9]{3}) ?(.*)$/ + p3 = /^status: ([0-9]{3}) ?(.*)$/i + + if (a = p1.match(str)) == nil + if (a = p2.match(str)) == nil + if (a = p3.match(str)) == nil + raise ArgumentError, "Invalid argument", caller + end + end + end + @status = a[1].to_i + if a[2] != "" + @reasonPhrase = a[2] + else + @reasonPhrase = getReasonPhrase(@status) + end + else + raise ArgumentError, "Argument of setstatus must be integer or string", caller + end + # }}} + end + + # Handles the output of your content and rescues all exceptions. Send all + # data in the block to this method. For example: + # web.out do + # web.header("Content-Type: text/plain") + # web.puts("Hello, plain world!") + # end + def out + # {{{ + @output_allowed = true + @buffer = []; # We use an array as buffer, because it's more performant :) + + begin + yield + rescue Exception => exception + $stderr.puts "Ruby exception rescued (#{exception.class}): #{exception.message}" + $stderr.puts exception.backtrace.join("\n") + + unless @output_started + self.setstatus(500) + @header = {} + end + + unless (@settings.has_key?("hide errors") and @settings["hide errors"] == true) + unless @output_started + self.header("Content-Type: text/html") + self.puts "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Strict//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">" + self.puts "<html>" + self.puts "<head>" + self.puts "<title>500 Internal Server Error</title>" + self.puts "</head>" + self.puts "<body>" + end + if @header.has_key?("content-type") and (@header["content-type"] =~ /^text\/html/i) == 0 + self.puts "<h1>Internal Server Error</h1>" + self.puts "<p>The server encountered an exception and was unable to complete your request.</p>" + self.puts "<p>The exception has provided the following information:</p>" + self.puts "<pre style=\"background: #FFCCCC; border: black solid 2px; margin-left: 2cm; margin-right: 2cm; padding: 2mm;\"><b>#{exception.class}</b>: #{exception.message} <b>on</b>" + self.puts + self.puts "#{exception.backtrace.join("\n")}</pre>" + self.puts "</body>" + self.puts "</html>" + else + self.puts "The server encountered an exception and was unable to complete your request" + self.puts "The exception has provided the following information:" + self.puts "#{exception.class}: #{exception.message}" + self.puts + self.puts exception.backtrace.join("\n") + end + end + end + + if @settings["cache"] + buffer = @buffer.join + + unless @output_started + unless @header.has_key?("content-length") + self.header("content-length: #{buffer.length}") + end + + sendHeaders + end + $stdout.print(buffer) + elsif !@output_started + sendHeaders + end + @output_allowed = false; + # }}} + end + + # Decodes URL encoded data, %20 for example stands for a space. + def Rweb.unescape(str) + # {{{ + if defined? str and str.is_a? String + str.gsub!(/\+/, " ") + str.gsub(/%.{2}/) do | s | + s[1,2].hex.chr + end + end + # }}} + end + + protected + def sendHeaders + # {{{ + + Cookie.disallow # no more cookies can be set or modified + if !(@settings.has_key?("silent") and @settings["silent"] == true) and !@header.has_key?("x-powered-by") + if @mod_ruby + header("x-powered-by: #{RWEB} (Ruby/#{RUBY_VERSION}, #{MOD_RUBY})"); + else + header("x-powered-by: #{RWEB} (Ruby/#{RUBY_VERSION})"); + end + end + + if @output_method == "ph" + if ((@status == nil or @status == 200) and !@header.has_key?("content-type") and !@header.has_key?("location")) + header("content-type: text/html") + end + + if @status != nil + $stdout.print "Status: #{@status} #{@reasonPhrase}\r\n" + end + + @header.each do |key, value| + key = key *1 # "unfreeze" key :) + key[0] = key[0,1].upcase![0] + + key = key.gsub(/-[a-z]/) do |char| + "-" + char[1,1].upcase + end + + $stdout.print "#{key}: #{value}\r\n" + end + cookies = Cookie.getHttpHeader # Get all cookies as an HTTP Header + if cookies + $stdout.print cookies + end + + $stdout.print "\r\n" + + elsif @output_method == "nph" + elsif @output_method == "mod_ruby" + r = Apache.request + + if ((@status == nil or @status == 200) and !@header.has_key?("content-type") and !@header.has_key?("location")) + header("text/html") + end + + if @status != nil + r.status_line = "#{@status} #{@reasonPhrase}" + end + + r.send_http_header + @header.each do |key, value| + key = key *1 # "unfreeze" key :) + + key[0] = key[0,1].upcase![0] + key = key.gsub(/-[a-z]/) do |char| + "-" + char[1,1].upcase + end + puts "#{key}: #{value.class}" + #r.headers_out[key] = value + end + end + @output_started = true + # }}} + end + + def getReasonPhrase (status) + # {{{ + if status == 100 + "Continue" + elsif status == 101 + "Switching Protocols" + elsif status == 200 + "OK" + elsif status == 201 + "Created" + elsif status == 202 + "Accepted" + elsif status == 203 + "Non-Authoritative Information" + elsif status == 204 + "No Content" + elsif status == 205 + "Reset Content" + elsif status == 206 + "Partial Content" + elsif status == 300 + "Multiple Choices" + elsif status == 301 + "Moved Permanently" + elsif status == 302 + "Found" + elsif status == 303 + "See Other" + elsif status == 304 + "Not Modified" + elsif status == 305 + "Use Proxy" + elsif status == 307 + "Temporary Redirect" + elsif status == 400 + "Bad Request" + elsif status == 401 + "Unauthorized" + elsif status == 402 + "Payment Required" + elsif status == 403 + "Forbidden" + elsif status == 404 + "Not Found" + elsif status == 405 + "Method Not Allowed" + elsif status == 406 + "Not Acceptable" + elsif status == 407 + "Proxy Authentication Required" + elsif status == 408 + "Request Time-out" + elsif status == 409 + "Conflict" + elsif status == 410 + "Gone" + elsif status == 411 + "Length Required" + elsif status == 412 + "Precondition Failed" + elsif status == 413 + "Request Entity Too Large" + elsif status == 414 + "Request-URI Too Large" + elsif status == 415 + "Unsupported Media Type" + elsif status == 416 + "Requested range not satisfiable" + elsif status == 417 + "Expectation Failed" + elsif status == 500 + "Internal Server Error" + elsif status == 501 + "Not Implemented" + elsif status == 502 + "Bad Gateway" + elsif status == 503 + "Service Unavailable" + elsif status == 504 + "Gateway Time-out" + elsif status == 505 + "HTTP Version not supported" + else + raise "Unknown Statuscode. See http://www.w3.org/Protocols/rfc2616/rfc2616-sec6.html#sec6.1 for more information." + end + # }}} + end +end + +class Cookie + attr_reader :name, :value, :maxage, :path, :domain, :secure, :comment + + # Sets a cookie. Please see below for details of the attributes. + def initialize (name, value = nil, maxage = nil, path = nil, domain = nil, secure = false) + # {{{ + # HTTP headers (Cookies are a HTTP header) can only set, while no content + # is send. So an exception will be raised, when @@allowed is set to false + # and a new cookie has set. + unless defined?(@@allowed) + @@allowed = true + end + unless @@allowed + raise "You can't set cookies after the HTTP headers are send." + end + + unless defined?(@@list) + @@list = [] + end + @@list += [self] + + unless defined?(@@type) + @@type = "netscape" + end + + unless name.class == String + raise TypeError, "The name of a cookie must be a string", caller + end + if value.class.superclass == Integer || value.class == Float + value = value.to_s + elsif value.class != String && value != nil + raise TypeError, "The value of a cookie must be a string, integer, float or nil", caller + end + if maxage.class == Time + maxage = maxage - Time.now + elsif !maxage.class.superclass == Integer || !maxage == nil + raise TypeError, "The maxage date of a cookie must be an Integer or Time object or nil.", caller + end + unless path.class == String || path == nil + raise TypeError, "The path of a cookie must be nil or a string", caller + end + unless domain.class == String || domain == nil + raise TypeError, "The value of a cookie must be nil or a string", caller + end + unless secure == true || secure == false + raise TypeError, "The secure field of a cookie must be true or false", caller + end + + @name, @value, @maxage, @path, @domain, @secure = name, value, maxage, path, domain, secure + @comment = nil + # }}} + end + + # Modifies the value of this cookie. The information you want to store. If the + # value is nil, the cookie will be deleted by the client. + # + # This attribute can be a String, Integer or Float object or nil. + def value=(value) + # {{{ + if value.class.superclass == Integer || value.class == Float + value = value.to_s + elsif value.class != String && value != nil + raise TypeError, "The value of a cookie must be a string, integer, float or nil", caller + end + @value = value + # }}} + end + + # Modifies the maxage of this cookie. This attribute defines the lifetime of + # the cookie, in seconds. A value of 0 means the cookie should be discarded + # imediatly. If it set to nil, the cookie will be deleted when the browser + # will be closed. + # + # Attention: This is different from other implementations like PHP, where you + # gives the seconds since 1/1/1970 0:00:00 GMT. + # + # This attribute must be an Integer or Time object or nil. + def maxage=(maxage) + # {{{ + if maxage.class == Time + maxage = maxage - Time.now + elsif maxage.class.superclass == Integer || !maxage == nil + raise TypeError, "The maxage of a cookie must be an Interger or Time object or nil.", caller + end + @maxage = maxage + # }}} + end + + # Modifies the path value of this cookie. The client will send this cookie + # only, if the requested document is this directory or a subdirectory of it. + # + # The value of the attribute must be a String object or nil. + def path=(path) + # {{{ + unless path.class == String || path == nil + raise TypeError, "The path of a cookie must be nil or a string", caller + end + @path = path + # }}} + end + + # Modifies the domain value of this cookie. The client will send this cookie + # only if it's connected with this domain (or a subdomain, if the first + # character is a dot like in ".ruby-lang.org") + # + # The value of this attribute must be a String or nil. + def domain=(domain) + # {{{ + unless domain.class == String || domain == nil + raise TypeError, "The domain of a cookie must be a String or nil.", caller + end + @domain = domain + # }}} + end + + # Modifies the secure flag of this cookie. If it's true, the client will only + # send this cookie if it is secured connected with us. + # + # The value od this attribute has to be true or false. + def secure=(secure) + # {{{ + unless secure == true || secure == false + raise TypeError, "The secure field of a cookie must be true or false", caller + end + @secure = secure + # }}} + end + + # Modifies the comment value of this cookie. The comment won't be send, if + # type is "netscape". + def comment=(comment) + # {{{ + unless comment.class == String || comment == nil + raise TypeError, "The comment of a cookie must be a string or nil", caller + end + @comment = comment + # }}} + end + + # Changes the type of all cookies. + # Allowed values are RFC2109 and netscape (default). + def Cookie.type=(type) + # {{{ + unless @@allowed + raise "The cookies are allready send, so you can't change the type anymore." + end + unless type.downcase == "rfc2109" && type.downcase == "netscape" + raise "The type of the cookies must be \"RFC2109\" or \"netscape\"." + end + @@type = type; + # }}} + end + + # After sending this message, no cookies can be set or modified. Use it, when + # HTTP-Headers are send. Rweb does this for you. + def Cookie.disallow + # {{{ + @@allowed = false + true + # }}} + end + + # Returns a HTTP header (type String) with all cookies. Rweb does this for + # you. + def Cookie.getHttpHeader + # {{{ + if defined?(@@list) + if @@type == "netscape" + str = "" + @@list.each do |cookie| + if cookie.value == nil + cookie.maxage = 0 + cookie.value = "" + end + # TODO: Name and value should be escaped! + str += "Set-Cookie: #{cookie.name}=#{cookie.value}" + unless cookie.maxage == nil + expire = Time.now + cookie.maxage + expire.gmtime + str += "; Expire=#{expire.strftime("%a, %d-%b-%Y %H:%M:%S %Z")}" + end + unless cookie.domain == nil + str += "; Domain=#{cookie.domain}" + end + unless cookie.path == nil + str += "; Path=#{cookie.path}" + end + if cookie.secure + str += "; Secure" + end + str += "\r\n" + end + return str + else # type == "RFC2109" + str = "Set-Cookie: " + comma = false; + + @@list.each do |cookie| + if cookie.value == nil + cookie.maxage = 0 + cookie.value = "" + end + if comma + str += "," + end + comma = true + + str += "#{cookie.name}=\"#{cookie.value}\"" + unless cookie.maxage == nil + str += "; Max-Age=\"#{cookie.maxage}\"" + end + unless cookie.domain == nil + str += "; Domain=\"#{cookie.domain}\"" + end + unless cookie.path == nil + str += "; Path=\"#{cookie.path}\"" + end + if cookie.secure + str += "; Secure" + end + unless cookie.comment == nil + str += "; Comment=\"#{cookie.comment}\"" + end + str += "; Version=\"1\"" + end + str + end + else + false + end + # }}} + end +end + +require 'strscan' + +module BBCode + DEBUG = true + + use 'encoder', 'tags', 'tagstack', 'smileys' + +=begin + The Parser class takes care of the encoding. + It scans the given BBCode (as plain text), finds tags + and smilies and also makes links of urls in text. + + Normal text is send directly to the encoder. + + If a tag was found, an instance of a Tag subclass is created + to handle the case. + + The @tagstack manages tag nesting and ensures valid HTML. +=end + + class Parser + class Attribute + # flatten and use only one empty_arg + def self.create attr + attr = flatten attr + return @@empty_attr if attr.empty? + new attr + end + + private_class_method :new + + # remove leading and trailing whitespace; concat lines + def self.flatten attr + attr.strip.gsub(/\n/, ' ') + # -> ^ and $ can only match at begin and end now + end + + ATTRIBUTE_SCAN = / + (?!$) # don't match at end + \s* + ( # $1 = key + [^=\s\]"\\]* + (?: + (?: \\. | "[^"\\]*(?:\\.[^"\\]*)*"? ) + [^=\s\]"\\]* + )* + ) + (?: + = + ( # $2 = value + [^\s\]"\\]* + (?: + (?: \\. | "[^"\\]*(?:\\.[^"\\]*)*"? ) + [^\s\]"\\]* + )* + )? + )? + \s* + /x + + def self.parse source + source = source.dup + # empty_tag: the tag looks like [... /] + # slice!: this deletes the \s*/] at the end + # \s+ because [url=http://rubybb.org/forum/] is NOT an empty tag. + # In RubyBBCode, you can use [url=http://rubybb.org/forum/ /], and this has to be + # interpreted correctly. + empty_tag = source.sub!(/^:/, '=') or source.slice!(/\/$/) + debug 'PARSE: ' + source.inspect + ' => ' + empty_tag.inspect + #-> we have now an attr that's EITHER empty OR begins and ends with non-whitespace. + + attr = Hash.new + attr[:flags] = [] + source.scan(ATTRIBUTE_SCAN) { |key, value| + if not value + attr[:flags] << unescape(key) + else + next if value.empty? and key.empty? + attr[unescape(key)] = unescape(value) + end + } + debug attr.inspect + + return empty_tag, attr + end + + def self.unescape_char esc + esc[1] + end + + def self.unquote qt + qt[1..-1].chomp('"').gsub(/\\./) { |esc| unescape_char esc } + end + + def self.unescape str + str.gsub(/ (\\.) | (" [^"\\]* (?:\\.[^"\\]*)* "?) /x) { + if $1 + unescape_char $1 + else + unquote $2 + end + } + end + + include Enumerable + def each &block + @args.each(&block) + end + + attr_reader :source, :args, :value + + def initialize source + @source = source + debug 'Attribute#new(%p)' % source + @empty_tag, @attr = Attribute.parse source + @value = @attr[''].to_s + end + + def empty? + self == @@empty_attr + end + + def empty_tag? + @empty_tag + end + + def [] *keys + res = @attr[*keys] + end + + def flags + attr[:flags] + end + + def to_s + @attr + end + + def inspect + 'ATTR[' + @attr.inspect + (@empty_tag ? ' | empty tag' : '') + ']' + end + end + class Attribute + @@empty_attr = new '' + end + end + + class Parser + def Parser.flatten str + # replace mac & dos newlines with unix style + str.gsub(/\r\n?/, "\n") + end + + def initialize input = '' + # input manager + @scanner = StringScanner.new '' + # output manager + @encoder = Encoder.new + @output = '' + # tag manager + @tagstack = TagStack.new(@encoder) + + @do_magic = true + # set the input + feed input + end + + # if you want, you can feed a parser instance after creating, + # or even feed it repeatedly. + def feed food + @scanner.string = Parser.flatten food + end + + # parse through the string using parse_token + def parse + parse_token until @scanner.eos? + @tagstack.close_all + @output = parse_magic @encoder.output + end + + def output + @output + end + + # ok, internals start here + private + # the default output functions. everything should use them or the tags. + def add_text text = @scanner.matched + @encoder.add_text text + end + + # use this carefully + def add_html html + @encoder.add_html html + end + + # highlights the text as error + def add_garbage garbage + add_html '<span class="error">' if DEBUG + add_text garbage + add_html '</span>' if DEBUG + end + + # unknown and incorrectly nested tags are ignored and + # sent as plaintext (garbage in - garbage out). + # in debug mode, garbage is marked with lime background. + def garbage_out start + @scanner.pos = start + garbage = @scanner.scan(/./m) + debug 'GARBAGE: ' + garbage + add_garbage garbage + end + + # simple text; everything but [, \[ allowed + SIMPLE_TEXT_SCAN_ = / + [^\[\\]* # normal* + (?: # ( + \\.? # special + [^\[\\]* # normal* + )* # )* + /mx + SIMPLE_TEXT_SCAN = /[^\[]+/ + +=begin + + WHAT IS A TAG? + ============== + + Tags in BBCode can be much more than just a simple [b]. + I use many terms here to differ the parts of each tag. + + Basic scheme: + [ code ] + TAG START TAG INFO TAG END + + Most tags need a second tag to close the range it opened. + This is done with CLOSING TAGS: + [/code] + or by using empty tags that have no content and close themselfes: + [url=winamp.com /] + You surely know this from HTML. + These slashes define the TAG KIND = normal|closing|empty and + cannot be used together. + + Everything between [ and ] and expluding the slashes is called the + TAG INFO. This info may contain: + - TAG ID + - TAG NAME including the tag id + - attributes + + The TAG ID is the first char of the info: + + TAG | ID + ----------+---- + [quote] | q + [±] | & + ["[b]"] | " + [/url] | u + [---] | - + + As you can see, the tag id shows the TAG TYPE, it can be a + normal tag, a formatting tag or an entity. + Therefor, the parser first scans the id to decide how to go + on with parsing. +=end + # tag + # TODO more complex expression allowing + # [quote="[ladico]"] and [quote=\[ladico\]] to be correct tags + TAG_BEGIN_SCAN = / + \[ # tag start + ( \/ )? # $1 = closing tag? + ( [^\]] ) # $2 = tag id + /x + TAG_END_SCAN = / + [^\]]* # rest that was not handled + \]? # tag end + /x + CLOSE_TAG_SCAN = / + ( [^\]]* ) # $1 = the rest of the tag info + ( \/ )? # $2 = empty tag? + \]? # tag end + /x + UNCLOSED_TAG_SCAN = / \[ /x + + CLASSIC_TAG_SCAN = / [a-z]* /ix + + SEPARATOR_TAG_SCAN = / \** /x + + FORMAT_TAG_SCAN = / -- -* /x + + QUOTED_SCAN = / + ( # $1 = quoted text + [^"\\]* # normal* + (?: # ( + \\. # special + [^"\\]* # normal* + )* # )* + ) + "? # end quote " + /mx + + ENTITY_SCAN = / + ( [^;\]]+ ) # $1 = entity code + ;? # optional ending semicolon + /ix + + SMILEY_SCAN = Smileys::SMILEY_PATTERN + + # this is the main parser loop that separates + # text - everything until "[" + # from + # tags - starting with "[", ending with "]" + def parse_token + if @scanner.scan(SIMPLE_TEXT_SCAN) + add_text + else + handle_tag + end + end + + def handle_tag + tag_start = @scanner.pos + + unless @scanner.scan TAG_BEGIN_SCAN + garbage_out tag_start + return + end + + closing, id = @scanner[1], @scanner[2] + #debug 'handle_tag(%p)' % @scanner.matched + + handled = + case id + + when /[a-z]/i + if @scanner.scan(CLASSIC_TAG_SCAN) + if handle_classic_tag(id + @scanner.matched, closing) + already_closed = true + end + end + + when '*' + if @scanner.scan(SEPARATOR_TAG_SCAN) + handle_asterisk tag_start, id + @scanner.matched + true + end + + when '-' + if @scanner.scan(FORMAT_TAG_SCAN) + #format = id + @scanner.matched + @encoder.add_html "\n<hr>\n" + true + end + + when '"' + if @scanner.scan(QUOTED_SCAN) + @encoder.add_text unescape(@scanner[1]) + true + end + + when '&' + if @scanner.scan(ENTITY_SCAN) + @encoder.add_entity @scanner[1] + true + end + + when Smileys::SMILEY_START_CHARSET + @scanner.pos = @scanner.pos - 1 # (ungetch) + if @scanner.scan(SMILEY_SCAN) + @encoder.add_html Smileys.smiley_to_image(@scanner.matched) + true + end + + end # case + + return garbage_out(tag_start) unless handled + + @scanner.scan(TAG_END_SCAN) unless already_closed + end + + ATTRIBUTES_SCAN = / + ( + [^\]"\\]* + (?: + (?: + \\. + | + " + [^"\\]* + (?: + \\. + [^"\\]* + )* + "? + ) + [^\]"\\]* + )* + ) + \]? + /x + + def handle_classic_tag name, closing + debug 'TAG: ' + (closing ? '/' : '') + name + # flatten + name.downcase! + tag_class = TAG_LIST[name] + return unless tag_class + + #debug((opening ? 'OPEN ' : 'CLOSE ') + tag_class.name) + + # create an attribute object to handle it + @scanner.scan(ATTRIBUTES_SCAN) + #debug name + ':' + @scanner[1] + attr = Attribute.create @scanner[1] + #debug 'ATTRIBUTES %p ' % attr #unless attr.empty? + + #debug 'closing: %p; name=%s, attr=%p' % [closing, name, attr] + + # OPEN + if not closing and tag = @tagstack.try_open_class(tag_class, attr) + #debug 'opening' + tag.do_open @scanner + # this should be done by the tag itself. + if attr.empty_tag? + tag.handle_empty + @tagstack.close_tag + elsif tag.special_content? + handle_special_content(tag) + @tagstack.close_tag + # # ignore asterisks directly after the opening; these are phpBBCode + # elsif tag.respond_to? :asterisk + # debug 'SKIP ASTERISKS: ' if @scanner.skip(ASTERISK_TAGS_SCAN) + end + + # CLOSE + elsif @tagstack.try_close_class(tag_class) + #debug 'closing' + # GARBAGE + else + return + end + + true + end + + def handle_asterisk tag_start, stars + #debug 'ASTERISK: ' + stars.to_s + # rule for asterisk tags: they belong to the last tag + # that handles them. tags opened after this tag are closed. + # if no open tag uses them, all are closed. + tag = @tagstack.close_all_until { |tag| tag.respond_to? :asterisk } + unless tag and tag.asterisk stars, @scanner + garbage_out tag_start + end + end + + def handle_special_content tag + scanned = @scanner.scan_until(tag.closing_tag) + if scanned + scanned.slice!(-(@scanner.matched.size)..-1) + else + scanned = @scanner.scan(/.*/m).to_s + end + #debug 'SPECIAL CONTENT: ' + scanned + tag.handle_content(scanned) + end + + def unescape text + # input: correctly formatted quoted string (without the quotes) + text.gsub(/\\(?:(["\\])|.)/) { $1 or $& } + end + + + # MAGIC FEAUTURES + + URL_PATTERN = /(?:(?:www|ftp)\.|(?>\w{3,}):\/\/)\S+/ + EMAIL_PATTERN = /(?>[\w\-_.]+)@[\w\-\.]+\.\w+/ + + HAS_MAGIC = /[&@#{Smileys::SMILEY_START_CHARS}]|(?i:www|ftp)/ + + MAGIC_PATTERN = Regexp.new('(\W|^)(%s)' % + [Smileys::MAGIC_SMILEY_PATTERN, URL_PATTERN, EMAIL_PATTERN].map { |pattern| + pattern.to_s + }.join('|') ) + + IS_SMILEY_PATTERN = Regexp.new('^%s' % Smileys::SMILEY_START_CHARSET.to_s ) + IS_URL_PATTERN = /^(?:(?i:www|ftp)\.|(?>\w+):\/\/)/ + URL_STARTS_WITH_PROTOCOL = /^\w+:\/\// + IS_EMAIL_PATTERN = /^[\w\-_.]+@/ + + def to_magic text + # debug MAGIC_PATTERN.to_s + text.gsub!(MAGIC_PATTERN) { + magic = $2 + $1 + case magic + when IS_SMILEY_PATTERN + Smileys.smiley_to_img magic + when IS_URL_PATTERN + last = magic.slice_punctation! # no punctation in my URL + href = magic + href.insert(0, 'http://') unless magic =~ URL_STARTS_WITH_PROTOCOL + '<a href="' + href + '">' + magic + '</a>' + last + when IS_EMAIL_PATTERN + last = magic.slice_punctation! + '<a href="mailto:' + magic + '">' + magic + '</a>' + last + else + raise '{{{' + magic + '}}}' + end + } + text + end + + # handles smileys and urls + def parse_magic html + return html unless @do_magic + scanner = StringScanner.new html + out = '' + while scanner.rest? + if scanner.scan(/ < (?: a\s .*? <\/a> | pre\W .*? <\/pre> | [^>]* > ) /mx) + out << scanner.matched + elsif scanner.scan(/ [^<]+ /x) + out << to_magic(scanner.matched) + + # this should never happen + elsif scanner.scan(/./m) + raise 'ERROR: else case reached' + end + end + out + end + end # Parser +end + +class String + def slice_punctation! + slice!(/[.:,!\?]+$/).to_s # return '' instead of nil + end +end + +# +# = Grammar +# +# An implementation of common algorithms on grammars. +# +# This is used by Shinobu, a visualization tool for educating compiler-building. +# +# Thanks to Andreas Kunert for his wonderful LR(k) Pamphlet (German, see http://www.informatik.hu-berlin.de/~kunert/papers/lr-analyse), and Aho/Sethi/Ullman for their Dragon Book. +# +# Homepage:: http://shinobu.cYcnus.de (not existing yet) +# Author:: murphy (Kornelius Kalnbach) +# Copyright:: (cc) 2005 cYcnus +# License:: GPL +# Version:: 0.2.0 (2005-03-27) + +require 'set_hash' +require 'ctype' +require 'tools' +require 'rules' +require 'trace' + +require 'first' +require 'follow' + +# = Grammar +# +# == Syntax +# +# === Rules +# +# Each line is a rule. +# The syntax is +# +# left - right +# +# where +left+ and +right+ can be uppercase and lowercase letters, +# and <code>-</code> can be any combination of <, >, - or whitespace. +# +# === Symbols +# +# Uppercase letters stand for meta symbols, lowercase for terminals. +# +# You can make epsilon-derivations by leaving <code><right></code> empty. +# +# === Example +# S - Ac +# A - Sc +# A - b +# A - +class Grammar + + attr_reader :tracer + # Creates a new Grammar. + # If $trace is true, the algorithms explain (textual) what they do to $stdout. + def initialize data, tracer = Tracer.new + @tracer = tracer + @rules = Rules.new + @terminals, @meta_symbols = SortedSet.new, Array.new + @start_symbol = nil + add_rules data + end + + attr_reader :meta_symbols, :terminals, :rules, :start_symbol + + alias_method :sigma, :terminals + alias_method :alphabet, :terminals + alias_method :variables, :meta_symbols + alias_method :nonterminals, :meta_symbols + + # A string representation of the grammar for debugging. + def inspect productions_too = false + 'Grammar(meta symbols: %s; alphabet: %s; productions: [%s]; start symbol: %s)' % + [ + meta_symbols.join(', '), + terminals.join(', '), + if productions_too + @rules.inspect + else + @rules.size + end, + start_symbol + ] + end + + # Add rules to the grammar. +rules+ should be a String or respond to +scan+ in a similar way. + # + # Syntax: see Grammar. + def add_rules grammar + @rules = Rules.parse grammar do |rule| + @start_symbol ||= rule.left + @meta_symbols << rule.left + @terminals.merge rule.right.split('').select { |s| terminal? s } + end + @meta_symbols.uniq! + update + end + + # Returns a hash acting as FIRST operator, so that + # <code>first["ABC"]</code> is FIRST(ABC). + # See http://en.wikipedia.org/wiki/LL_parser "Constructing an LL(1) parsing table" for details. + def first + first_operator + end + + # Returns a hash acting as FOLLOW operator, so that + # <code>first["A"]</code> is FOLLOW(A). + # See http://en.wikipedia.org/wiki/LL_parser "Constructing an LL(1) parsing table" for details. + def follow + follow_operator + end + + LLError = Class.new(Exception) + LLErrorType1 = Class.new(LLError) + LLErrorType2 = Class.new(LLError) + + # Tests if the grammar is LL(1). + def ll1? + begin + for meta in @meta_symbols + first_sets = @rules[meta].map { |alpha| first[alpha] } + first_sets.inject(Set[]) do |already_used, another_first_set| + unless already_used.disjoint? another_first_set + raise LLErrorType1 + end + already_used.merge another_first_set + end + + if first[meta].include? EPSILON and not first[meta].disjoint? follow[meta] + raise LLErrorType2 + end + end + rescue LLError + false + else + true + end + end + +private + + def first_operator + @first ||= FirstOperator.new self + end + + def follow_operator + @follow ||= FollowOperator.new self + end + + def update + @first = @follow = nil + end + +end + +if $0 == __FILE__ + eval DATA.read, nil, $0, __LINE__+4 +end + +require 'test/unit' + +class TestCaseGrammar < Test::Unit::TestCase + + include Grammar::Symbols + + def fifo s + Set[*s.split('')] + end + + def test_fifo + assert_equal Set[], fifo('') + assert_equal Set[EPSILON, END_OF_INPUT, 'x', 'Y'], fifo('?xY$') + end + + TEST_GRAMMAR_1 = <<-EOG +S - ABCD +A - a +A - +B - b +B - +C - c +C - +D - S +D - + EOG + + def test_symbols + assert EPSILON + assert END_OF_INPUT + end + + def test_first_1 + g = Grammar.new TEST_GRAMMAR_1 + + f = nil + assert_nothing_raised { f = g.first } + assert_equal(Set['a', EPSILON], f['A']) + assert_equal(Set['b', EPSILON], f['B']) + assert_equal(Set['c', EPSILON], f['C']) + assert_equal(Set['a', 'b', 'c', EPSILON], f['D']) + assert_equal(f['D'], f['S']) + end + + def test_follow_1 + g = Grammar.new TEST_GRAMMAR_1 + + f = nil + assert_nothing_raised { f = g.follow } + assert_equal(Set['a', 'b', 'c', END_OF_INPUT], f['A']) + assert_equal(Set['a', 'b', 'c', END_OF_INPUT], f['B']) + assert_equal(Set['a', 'b', 'c', END_OF_INPUT], f['C']) + assert_equal(Set[END_OF_INPUT], f['D']) + assert_equal(Set[END_OF_INPUT], f['S']) + end + + + TEST_GRAMMAR_2 = <<-EOG +S - Ed +E - EpT +E - EmT +E - T +T - TuF +T - TdF +T - F +F - i +F - n +F - aEz + EOG + + def test_first_2 + g = Grammar.new TEST_GRAMMAR_2 + + f = nil + assert_nothing_raised { f = g.first } + assert_equal(Set['a', 'n', 'i'], f['E']) + assert_equal(Set['a', 'n', 'i'], f['F']) + assert_equal(Set['a', 'n', 'i'], f['T']) + assert_equal(Set['a', 'n', 'i'], f['S']) + end + + def test_follow_2 + g = Grammar.new TEST_GRAMMAR_2 + + f = nil + assert_nothing_raised { f = g.follow } + assert_equal(Set['m', 'd', 'z', 'p'], f['E']) + assert_equal(Set['m', 'd', 'z', 'p', 'u'], f['F']) + assert_equal(Set['m', 'd', 'z', 'p', 'u'], f['T']) + assert_equal(Set[END_OF_INPUT], f['S']) + end + + LLError = Grammar::LLError + + TEST_GRAMMAR_3 = <<-EOG +E - TD +D - pTD +D - +T - FS +S - uFS +S - +S - p +F - aEz +F - i + EOG + + NoError = Class.new(Exception) + + def test_first_3 + g = Grammar.new TEST_GRAMMAR_3 + + # Grammar 3 is LL(1), so all first-sets must be disjoint. + f = nil + assert_nothing_raised { f = g.first } + assert_equal(Set['a', 'i'], f['E']) + assert_equal(Set[EPSILON, 'p'], f['D']) + assert_equal(Set['a', 'i'], f['F']) + assert_equal(Set['a', 'i'], f['T']) + assert_equal(Set[EPSILON, 'u', 'p'], f['S']) + for m in g.meta_symbols + r = g.rules[m] + firsts = r.map { |x| f[x] }.to_set + assert_nothing_raised do + firsts.inject(Set.new) do |already_used, another_first_set| + raise LLError, 'not disjoint!' unless already_used.disjoint? another_first_set + already_used.merge another_first_set + end + end + end + end + + def test_follow_3 + g = Grammar.new TEST_GRAMMAR_3 + + # Grammar 3 is not LL(1), because epsilon is in FIRST(S), + # but FIRST(S) and FOLLOW(S) are not disjoint. + f = nil + assert_nothing_raised { f = g.follow } + assert_equal(Set['z', END_OF_INPUT], f['E']) + assert_equal(Set['z', END_OF_INPUT], f['D']) + assert_equal(Set['z', 'p', 'u', END_OF_INPUT], f['F']) + assert_equal(Set['p', 'z', END_OF_INPUT], f['T']) + assert_equal(Set['p', 'z', END_OF_INPUT], f['S']) + for m in g.meta_symbols + first_m = g.first[m] + next unless first_m.include? EPSILON + assert_raise(m == 'S' ? LLError : NoError) do + if first_m.disjoint? f[m] + raise NoError # this is fun :D + else + raise LLError + end + end + end + end + + TEST_GRAMMAR_3b = <<-EOG +E - TD +D - pTD +D - PTD +D - +T - FS +S - uFS +S - +F - aEz +F - i +P - p + EOG + + def test_first_3b + g = Grammar.new TEST_GRAMMAR_3b + + # Grammar 3b is NOT LL(1), since not all first-sets are disjoint. + f = nil + assert_nothing_raised { f = g.first } + assert_equal(Set['a', 'i'], f['E']) + assert_equal(Set[EPSILON, 'p'], f['D']) + assert_equal(Set['p'], f['P']) + assert_equal(Set['a', 'i'], f['F']) + assert_equal(Set['a', 'i'], f['T']) + assert_equal(Set[EPSILON, 'u'], f['S']) + for m in g.meta_symbols + r = g.rules[m] + firsts = r.map { |x| f[x] } + assert_raise(m == 'D' ? LLError : NoError) do + firsts.inject(Set.new) do |already_used, another_first_set| + raise LLError, 'not disjoint!' unless already_used.disjoint? another_first_set + already_used.merge another_first_set + end + raise NoError + end + end + end + + def test_follow_3b + g = Grammar.new TEST_GRAMMAR_3b + + # Although Grammar 3b is NOT LL(1), the FOLLOW-condition is satisfied. + f = nil + assert_nothing_raised { f = g.follow } + assert_equal(fifo('z$'), f['E'], 'E') + assert_equal(fifo('z$'), f['D'], 'D') + assert_equal(fifo('ai'), f['P'], 'P') + assert_equal(fifo('z$pu'), f['F'], 'F') + assert_equal(fifo('z$p'), f['T'], 'T') + assert_equal(fifo('z$p'), f['S'], 'S') + for m in g.meta_symbols + first_m = g.first[m] + next unless first_m.include? EPSILON + assert_raise(NoError) do + if first_m.disjoint? f[m] + raise NoError # this is fun :D + else + raise LLError + end + end + end + end + + def test_ll1? + assert_equal false, Grammar.new(TEST_GRAMMAR_3).ll1?, 'Grammar 3' + assert_equal false, Grammar.new(TEST_GRAMMAR_3b).ll1?, 'Grammar 3b' + end + + def test_new + assert_nothing_raised { Grammar.new '' } + assert_nothing_raised { Grammar.new TEST_GRAMMAR_1 } + assert_nothing_raised { Grammar.new TEST_GRAMMAR_2 } + assert_nothing_raised { Grammar.new TEST_GRAMMAR_3 } + assert_nothing_raised { Grammar.new TEST_GRAMMAR_1 + TEST_GRAMMAR_2 + TEST_GRAMMAR_3 } + assert_raise(ArgumentError) { Grammar.new 'S - ?' } + end +end + +# vim:foldmethod=syntax + +#!/usr/bin/env ruby + +require 'fox12' + +include Fox + +class Window < FXMainWindow + def initialize(app) + super(app, app.appName + ": First Set Calculation", nil, nil, DECOR_ALL, 0, 0, 800, 600, 0, 0) + + # {{{ menubar + menubar = FXMenuBar.new(self, LAYOUT_SIDE_TOP|LAYOUT_FILL_X) + + filemenu = FXMenuPane.new(self) + + FXMenuCommand.new(filemenu, "&Start\tCtl-S\tStart the application.", nil, getApp()).connect(SEL_COMMAND, method(:start)) + FXMenuCommand.new(filemenu, "&Quit\tAlt-F4\tQuit the application.", nil, getApp(), FXApp::ID_QUIT) + FXMenuTitle.new(menubar, "&File", nil, filemenu) + # }}} menubar + + # {{{ statusbar + @statusbar = FXStatusBar.new(self, LAYOUT_SIDE_BOTTOM|LAYOUT_FILL_X|STATUSBAR_WITH_DRAGCORNER) + # }}} statusbar + + # {{{ window content + horizontalsplitt = FXSplitter.new(self, SPLITTER_VERTICAL|LAYOUT_SIDE_TOP|LAYOUT_FILL) + + + @productions = FXList.new(horizontalsplitt, nil, 0, LAYOUT_SIDE_TOP|LAYOUT_FILL_X|LAYOUT_FIX_HEIGHT|LIST_SINGLESELECT) + @productions.height = 100 + + @result = FXTable.new(horizontalsplitt, nil, 0, LAYOUT_FILL) + @result.height = 200 + @result.setTableSize(2, 2, false) + @result.rowHeaderWidth = 0 + + header = @result.columnHeader + header.setItemText 0, 'X' + header.setItemText 1, 'FIRST(X)' + for item in header + item.justification = FXHeaderItem::CENTER_X + end + + @debug = FXText.new(horizontalsplitt, nil, 0, LAYOUT_SIDE_BOTTOM|LAYOUT_FILL_X|LAYOUT_FIX_HEIGHT) + @debug.height = 200 + + # }}} window content + end + + def load_grammar grammar + @tracer = FirstTracer.new(self) + @grammar = Grammar.new grammar, @tracer + @rules_indexes = Hash.new + @grammar.rules.each_with_index do |rule, i| + @productions.appendItem rule.inspect + @rules_indexes[rule] = i + end + end + + def create + super + show(PLACEMENT_SCREEN) + end + + def rule rule + @productions.selectItem @rules_indexes[rule] + sleep 0.1 + end + + def iterate i + setTitle i.to_s + sleep 0.1 + end + + def missing what + @debug.appendText what + "\n" + sleep 0.1 + end + + def start sender, sel, pointer + Thread.new do + begin + @grammar.first + rescue => boom + @debug.appendText [boom.to_s, *boom.backtrace].join("\n") + end + end + end + +end + +$: << 'grammar' +require 'grammar' + +require 'first_tracer' + +app = FXApp.new("Shinobu", "cYcnus") + +# fenster erzeugen +window = Window.new app + +unless ARGV.empty? + grammar = File.read(ARGV.first) +else + grammar = <<-EOG1 +Z --> S +S --> Sb +S --> bAa +A --> aSc +A --> a +A --> aSb + EOG1 +end + +window.load_grammar grammar + +app.create +app.run + +require 'erb' +require 'ftools' +require 'yaml' +require 'redcloth' + +module WhyTheLuckyStiff + class Book + attr_accessor :author, :title, :terms, :image, :teaser, + :chapters, :expansion_paks, :encoding, :credits + def [] x + @lang.fetch(x) do + warn warning = "[not translated: '#{x}'!]" + warning + end + end + end + + def Book::load( file_name ) + YAML::load( File.open( file_name ) ) + end + + class Section + attr_accessor :index, :header, :content + def initialize( i, h, c ) + @index, @header, @content = i, h, RedCloth::new( c.to_s ) + end + end + + class Sidebar + attr_accessor :title, :content + end + + YAML::add_domain_type( 'whytheluckystiff.net,2003', 'sidebar' ) do |taguri, val| + YAML::object_maker( Sidebar, 'title' => val.keys.first, 'content' => RedCloth::new( val.values.first ) ) + end + class Chapter + attr_accessor :index, :title, :sections + def initialize( i, t, sects ) + @index = i + @title = t + i = 0 + @sections = sects.collect do |s| + if s.respond_to?( :keys ) + i += 1 + Section.new( i, s.keys.first, s.values.first ) + else + s + end + end + end + end + + YAML::add_domain_type( 'whytheluckystiff.net,2003', 'book' ) do |taguri, val| + ['chapters', 'expansion_paks'].each do |chaptype| + i = 0 + val[chaptype].collect! do |c| + i += 1 + Chapter::new( i, c.keys.first, c.values.first ) + end + end + val['teaser'].collect! do |t| + Section::new( 1, t.keys.first, t.values.first ) + end + val['terms'] = RedCloth::new( val['terms'] ) + YAML::object_maker( Book, val ) + end + + class Image + attr_accessor :file_name + end + + YAML::add_domain_type( 'whytheluckystiff.net,2003', 'img' ) do |taguri, val| + YAML::object_maker( Image, 'file_name' => "i/" + val ) + end +end + +# +# Convert the book to HTML +# +if __FILE__ == $0 + unless ARGV[0] + puts "Usage: #{$0} [/path/to/save/html]" + exit + end + + site_path = ARGV[0] + book = WhyTheLuckyStiff::Book::load( 'poignant.yml' ) + chapter = nil + + # Write index page + index_tpl = ERB::new( File.open( 'index.erb' ).read ) + File.open( File.join( site_path, 'index.html' ), 'w' ) do |out| + out << index_tpl.result + end + + book.chapters = book.chapters[0,3] if ARGV.include? '-fast' + + # Write chapter pages + chapter_tpl = ERB::new( File.open( 'chapter.erb' ).read ) + book.chapters.each do |chapter| + File.open( File.join( site_path, "chapter-#{ chapter.index }.html" ), 'w' ) do |out| + out << chapter_tpl.result + end + end + exit if ARGV.include? '-fast' + + # Write expansion pak pages + expak_tpl = ERB::new( File.open( 'expansion-pak.erb' ).read ) + book.expansion_paks.each do |pak| + File.open( File.join( site_path, "expansion-pak-#{ pak.index }.html" ), 'w' ) do |out| + out << expak_tpl.result( binding ) + end + end + + # Write printable version + print_tpl = ERB::new( File.open( 'print.erb' ).read ) + File.open( File.join( site_path, "print.html" ), 'w' ) do |out| + out << print_tpl.result + end + + # Copy css + images into site + copy_list = ["guide.css"] + + Dir["i/*"].find_all { |image| image =~ /\.(gif|jpg|png)$/ } + + File.makedirs( File.join( site_path, "i" ) ) + copy_list.each do |copy_file| + File.copy( copy_file, File.join( site_path, copy_file ) ) + end +end + +#!/usr/bin/env ruby + +require 'fox' +begin + require 'opengl' +rescue LoadError + require 'fox/missingdep' + MSG = <<EOM + Sorry, this example depends on the OpenGL extension. Please + check the Ruby Application Archives for an appropriate + download site. +EOM + missingDependency(MSG) +end + + +include Fox +include Math + +Deg2Rad = Math::PI / 180 + +D_MAX = 6 +SQUARE_SIZE = 2.0 / D_MAX +SQUARE_DISTANCE = 4.0 / D_MAX +AMPLITUDE = SQUARE_SIZE +LAMBDA = D_MAX.to_f / 2 + +class GLTestWindow < FXMainWindow + + # How often our timer will fire (in milliseconds) + TIMER_INTERVAL = 500 + + # Rotate the boxes when a timer message is received + def onTimeout(sender, sel, ptr) + @angle += 10.0 +# @size = 0.5 + 0.2 * Math.cos(Deg2Rad * @angle) + drawScene() + @timer = getApp().addTimeout(TIMER_INTERVAL, method(:onTimeout)) + end + + # Rotate the boxes when a chore message is received + def onChore(sender, sel, ptr) + @angle += 10.0 +# @angle %= 360.0 +# @size = 0.5 + 0.2 * Math.cos(Deg2Rad * @angle) + drawScene() + @chore = getApp().addChore(method(:onChore)) + end + + # Draw the GL scene + def drawScene + lightPosition = [15.0, 10.0, 5.0, 1.0] + lightAmbient = [ 0.1, 0.1, 0.1, 1.0] + lightDiffuse = [ 0.9, 0.9, 0.9, 1.0] + redMaterial = [ 0.0, 0.0, 1.0, 1.0] + blueMaterial = [ 0.0, 1.0, 0.0, 1.0] + + width = @glcanvas.width.to_f + height = @glcanvas.height.to_f + aspect = width/height + + # Make context current + @glcanvas.makeCurrent() + + GL.Viewport(0, 0, @glcanvas.width, @glcanvas.height) + + GL.ClearColor(1.0/256, 0.0, 5.0/256, 1.0) + GL.Clear(GL::COLOR_BUFFER_BIT|GL::DEPTH_BUFFER_BIT) + GL.Enable(GL::DEPTH_TEST) + + GL.Disable(GL::DITHER) + + GL.MatrixMode(GL::PROJECTION) + GL.LoadIdentity() + GLU.Perspective(30.0, aspect, 1.0, 100.0) + + GL.MatrixMode(GL::MODELVIEW) + GL.LoadIdentity() + GLU.LookAt(5.0, 10.0, 15.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0) + + GL.ShadeModel(GL::SMOOTH) + GL.Light(GL::LIGHT0, GL::POSITION, lightPosition) + GL.Light(GL::LIGHT0, GL::AMBIENT, lightAmbient) + GL.Light(GL::LIGHT0, GL::DIFFUSE, lightDiffuse) + GL.Enable(GL::LIGHT0) + GL.Enable(GL::LIGHTING) + + GL.Rotated(0.1*@angle, 0.0, 1.0, 0.0) + for x in -D_MAX..D_MAX + for y in -D_MAX..D_MAX + h1 = (x + y - 2).abs + h2 = (y - x + 1).abs + GL.PushMatrix + c = [1, 0, 0, 1] + GL.Material(GL::FRONT, GL::AMBIENT, c) + GL.Material(GL::FRONT, GL::DIFFUSE, c) + + GL.Translated( + y * SQUARE_DISTANCE, + AMPLITUDE * h1, + x * SQUARE_DISTANCE + ) + + GL.Begin(GL::TRIANGLE_STRIP) + GL.Normal(1.0, 0.0, 0.0) + GL.Vertex(-SQUARE_SIZE, +SQUARE_SIZE, -SQUARE_SIZE) + GL.Vertex(-SQUARE_SIZE, +SQUARE_SIZE, +SQUARE_SIZE) + GL.Vertex(+SQUARE_SIZE, +SQUARE_SIZE, -SQUARE_SIZE) + GL.Vertex(+SQUARE_SIZE, +SQUARE_SIZE, +SQUARE_SIZE) + GL.End + + GL.PopMatrix + + GL.PushMatrix + c = [0, 0, 1, 1] + GL.Material(GL::FRONT, GL::AMBIENT, c) + GL.Material(GL::FRONT, GL::DIFFUSE, c) + + GL.Translated( + y * SQUARE_DISTANCE, + AMPLITUDE * h2, + x * SQUARE_DISTANCE + ) + + GL.Begin(GL::TRIANGLE_STRIP) + GL.Normal(1.0, 0.0, 0.0) + GL.Vertex(-SQUARE_SIZE, +SQUARE_SIZE, -SQUARE_SIZE) + GL.Vertex(-SQUARE_SIZE, +SQUARE_SIZE, +SQUARE_SIZE) + GL.Vertex(+SQUARE_SIZE, +SQUARE_SIZE, -SQUARE_SIZE) + GL.Vertex(+SQUARE_SIZE, +SQUARE_SIZE, +SQUARE_SIZE) + GL.End + + GL.PopMatrix + + GL.PushMatrix + c = [0.0 + (x/10.0), 0.0 + (y/10.0), 0, 1] + GL.Material(GL::FRONT, GL::AMBIENT, c) + GL.Material(GL::FRONT, GL::DIFFUSE, c) + + GL.Translated( + y * SQUARE_DISTANCE, + 0, + x * SQUARE_DISTANCE + ) + + GL.Begin(GL::TRIANGLE_STRIP) + GL.Normal(1.0, 0.0, 0.0) + GL.Vertex(-SQUARE_SIZE, +SQUARE_SIZE, -SQUARE_SIZE) + GL.Vertex(-SQUARE_SIZE, +SQUARE_SIZE, +SQUARE_SIZE) + GL.Vertex(+SQUARE_SIZE, +SQUARE_SIZE, -SQUARE_SIZE) + GL.Vertex(+SQUARE_SIZE, +SQUARE_SIZE, +SQUARE_SIZE) + GL.End + + GL.PopMatrix + end + end + + # Swap if it is double-buffered + if @glvisual.isDoubleBuffer + @glcanvas.swapBuffers + end + + # Make context non-current + @glcanvas.makeNonCurrent + end + + def initialize(app) + # Invoke the base class initializer + super(app, "OpenGL Test Application", nil, nil, DECOR_ALL, 0, 0, 1024, 768) + + # Construct the main window elements + frame = FXHorizontalFrame.new(self, LAYOUT_SIDE_TOP|LAYOUT_FILL_X|LAYOUT_FILL_Y) + frame.padLeft, frame.padRight = 0, 0 + frame.padTop, frame.padBottom = 0, 0 + + # Left pane to contain the glcanvas + glcanvasFrame = FXVerticalFrame.new(frame, + LAYOUT_FILL_X|LAYOUT_FILL_Y|LAYOUT_TOP|LAYOUT_LEFT) + glcanvasFrame.padLeft, glcanvasFrame.padRight = 10, 10 + glcanvasFrame.padTop, glcanvasFrame.padBottom = 10, 10 + + # Label above the glcanvas + FXLabel.new(glcanvasFrame, "OpenGL Canvas Frame", nil, + JUSTIFY_CENTER_X|LAYOUT_FILL_X) + + # Horizontal divider line + FXHorizontalSeparator.new(glcanvasFrame, SEPARATOR_GROOVE|LAYOUT_FILL_X) + + # Drawing glcanvas + glpanel = FXVerticalFrame.new(glcanvasFrame, (FRAME_SUNKEN|FRAME_THICK| + LAYOUT_FILL_X|LAYOUT_FILL_Y|LAYOUT_TOP|LAYOUT_LEFT)) + glpanel.padLeft, glpanel.padRight = 0, 0 + glpanel.padTop, glpanel.padBottom = 0, 0 + + # A visual to draw OpenGL + @glvisual = FXGLVisual.new(getApp(), VISUAL_DOUBLEBUFFER) + + # Drawing glcanvas + @glcanvas = FXGLCanvas.new(glpanel, @glvisual, nil, 0, + LAYOUT_FILL_X|LAYOUT_FILL_Y|LAYOUT_TOP|LAYOUT_LEFT) + @glcanvas.connect(SEL_PAINT) { + drawScene + } + @glcanvas.connect(SEL_CONFIGURE) { + if @glcanvas.makeCurrent + GL.Viewport(0, 0, @glcanvas.width, @glcanvas.height) + @glcanvas.makeNonCurrent + end + } + + # Right pane for the buttons + buttonFrame = FXVerticalFrame.new(frame, LAYOUT_FILL_Y|LAYOUT_TOP|LAYOUT_LEFT) + buttonFrame.padLeft, buttonFrame.padRight = 10, 10 + buttonFrame.padTop, buttonFrame.padBottom = 10, 10 + + # Label above the buttons + FXLabel.new(buttonFrame, "Button Frame", nil, + JUSTIFY_CENTER_X|LAYOUT_FILL_X) + + # Horizontal divider line + FXHorizontalSeparator.new(buttonFrame, SEPARATOR_RIDGE|LAYOUT_FILL_X) + + # Spin according to timer + spinTimerBtn = FXButton.new(buttonFrame, + "Spin &Timer\tSpin using interval timers\nNote the app + blocks until the interal has elapsed...", nil, + nil, 0, FRAME_THICK|FRAME_RAISED|LAYOUT_FILL_X|LAYOUT_TOP|LAYOUT_LEFT) + spinTimerBtn.padLeft, spinTimerBtn.padRight = 10, 10 + spinTimerBtn.padTop, spinTimerBtn.padBottom = 5, 5 + spinTimerBtn.connect(SEL_COMMAND) { + @spinning = true + @timer = getApp().addTimeout(TIMER_INTERVAL, method(:onTimeout)) + } + spinTimerBtn.connect(SEL_UPDATE) { |sender, sel, ptr| + @spinning ? sender.disable : sender.enable + } + + # Spin according to chore + spinChoreBtn = FXButton.new(buttonFrame, + "Spin &Chore\tSpin as fast as possible using chores\nNote even though the + app is very responsive, it never blocks;\nthere is always something to + do...", nil, + nil, 0, FRAME_THICK|FRAME_RAISED|LAYOUT_FILL_X|LAYOUT_TOP|LAYOUT_LEFT) + spinChoreBtn.padLeft, spinChoreBtn.padRight = 10, 10 + spinChoreBtn.padTop, spinChoreBtn.padBottom = 5, 5 + spinChoreBtn.connect(SEL_COMMAND) { + @spinning = true + @chore = getApp().addChore(method(:onChore)) + } + spinChoreBtn.connect(SEL_UPDATE) { |sender, sel, ptr| + @spinning ? sender.disable : sender.enable + } + + # Stop spinning + stopBtn = FXButton.new(buttonFrame, + "&Stop Spin\tStop this mad spinning, I'm getting dizzy", nil, + nil, 0, FRAME_THICK|FRAME_RAISED|LAYOUT_FILL_X|LAYOUT_TOP|LAYOUT_LEFT) + stopBtn.padLeft, stopBtn.padRight = 10, 10 + stopBtn.padTop, stopBtn.padBottom = 5, 5 + stopBtn.connect(SEL_COMMAND) { + @spinning = false + if @timer + getApp().removeTimeout(@timer) + @timer = nil + end + if @chore + getApp().removeChore(@chore) + @chore = nil + end + } + stopBtn.connect(SEL_UPDATE) { |sender, sel, ptr| + @spinning ? sender.enable : sender.disable + } + + # Exit button + exitBtn = FXButton.new(buttonFrame, "&Exit\tExit the application", nil, + getApp(), FXApp::ID_QUIT, + FRAME_THICK|FRAME_RAISED|LAYOUT_FILL_X|LAYOUT_TOP|LAYOUT_LEFT) + exitBtn.padLeft, exitBtn.padRight = 10, 10 + exitBtn.padTop, exitBtn.padBottom = 5, 5 + + # Make a tooltip + FXTooltip.new(getApp()) + + # Initialize private variables + @spinning = false + @chore = nil + @timer = nil + @angle = 0.0 + @size = 0.5 + end + + # Create and initialize + def create + super + show(PLACEMENT_SCREEN) + end +end + +if __FILE__ == $0 + # Construct the application + application = FXApp.new("GLTest", "FoxTest") + + # To ensure that the chores-based spin will run as fast as possible, + # we can disable the chore in FXRuby's event loop that tries to schedule + # other threads. This is OK for this program because there aren't any + # other Ruby threads running. + + #application.disableThreads + + # Construct the main window + GLTestWindow.new(application) + + # Create the app's windows + application.create + + # Run the application + application.run +end + +class Facelet + attr_accessor :color + def initialize(color) + @color = color + end + + def to_s + @color + end +end + +class Edge + attr_accessor :facelets, :colors + + def initialize(facelets) + @facelets = facelets + @colors = @facelets.map { |fl| fl.color } + end + + def apply(edge) + @facelets.each_with_index { |fl, i| + fl.color = edge.colors[i] + } + end + + def inspect + "\n%s %s\n%s %s %s" % facelets + end +end + +class Side + attr_reader :num, :facelets + attr_accessor :sides + + def initialize(num) + @num = num + @sides = [] + @facelets = [] + @fl_by_side = {} + end + + # facelets & sides + # 0 + # 0 1 2 + # 3 3 4 5 1 + # 6 7 8 + # 2 + + def facelets=(facelets) + @facelets = facelets.map { |c| Facelet.new(c) } + init_facelet 0, 3,0 + init_facelet 1, 0 + init_facelet 2, 0,1 + init_facelet 3, 3 + init_facelet 5, 1 + init_facelet 6, 2,3 + init_facelet 7, 2 + init_facelet 8, 1,2 + end + + def <=>(side) + self.num <=> side.num + end + + def init_facelet(pos, *side_nums) + sides = side_nums.map { |num| @sides[num] }.sort + @fl_by_side[sides] = pos + end + + def []=(color, *sides) + @facelets[@fl_by_side[sides.sort]].color = color + end + + def values_at(*sides) + sides.map { |sides| @facelets[@fl_by_side[sides.sort]] } + end + + def inspect(range=nil) + if range + @facelets.values_at(*(range.to_a)).join(' ') + else + <<-EOS.gsub(/\d/) { |num| @facelets[num.to_i] }.gsub(/[ABCD]/) { |side| @sides[side[0]-?A].num.to_s } + A + 0 1 2 + D 3 4 5 B + 6 7 8 + C + EOS + end + end + + def get_edge(side) + trio = (-1..1).map { |x| (side + x) % 4 } + prev_side, this_side, next_side = @sides.values_at(*trio) + e = Edge.new( + self .values_at( [this_side], [this_side, next_side] ) + + this_side.values_at( [self, prev_side], [self ], [self, next_side] ) + ) + #puts 'Edge created for side %d: ' % side + e.inspect + e + end + + def turn(dir) + #p 'turn side %d in %d' % [num, dir] + edges = (0..3).map { |n| get_edge n } + for i in 0..3 + edges[i].apply edges[(i-dir) % 4] + end + end +end + +class Cube + def initialize + @sides = [] + %w(left front right back top bottom).each_with_index { |side, i| + eval("@sides[#{i}] = @#{side} = Side.new(#{i})") + } + @left.sides = [@top, @front, @bottom, @back] + @front.sides = [@top, @right, @bottom, @left] + @right.sides = [@top, @back, @bottom, @front] + @back.sides = [@top, @left, @bottom, @right] + @top.sides = [@back, @right, @front, @left] + @bottom.sides = [@front, @right, @back, @left] + end + + def read_facelets(fs) + pattern = Regexp.new(<<-EOP.gsub(/\w/, '\w').gsub(/\s+/, '\s*')) + (w w w) + (w w w) + (w w w) +(r r r) (g g g) (b b b) (o o o) +(r r r) (g g g) (b b b) (o o o) +(r r r) (g g g) (b b b) (o o o) + (y y y) + (y y y) + (y y y) + EOP + md = pattern.match(fs).to_a + + @top.facelets = parse_facelets(md.values_at(1,2,3)) + @left.facelets = parse_facelets(md.values_at(4,8,12)) + @front.facelets = parse_facelets(md.values_at(5,9,13)) + @right.facelets = parse_facelets(md.values_at(6,10,14)) + @back.facelets = parse_facelets(md.values_at(7,11,15)) + @bottom.facelets = parse_facelets(md.values_at(16,17,18)) + end + + def turn(side, dir) + #p 'turn %d in %d' % [side, dir] + @sides[side].turn(dir) + #puts inspect + end + + def inspect + <<-EOF.gsub(/(\d):(\d)-(\d)/) { @sides[$1.to_i].inspect(Range.new($2.to_i, $3.to_i)) } + 4:0-2 + 4:3-5 + 4:6-8 +0:0-2 1:0-2 2:0-2 3:0-2 +0:3-5 1:3-5 2:3-5 3:3-5 +0:6-8 1:6-8 2:6-8 3:6-8 + 5:0-2 + 5:3-5 + 5:6-8 + EOF + end + +private + def parse_facelets(rows) + rows.join.delete(' ').split(//) + end +end + +#$stdin = DATA + +gets.to_i.times do |i| + puts "Scenario ##{i+1}:" + fs = '' + 9.times { fs << gets } + cube = Cube.new + cube.read_facelets fs + gets.to_i.times do |t| + side, dir = gets.split.map {|s| s.to_i} + cube.turn(side, dir) + end + puts cube.inspect + puts +end + +# 2004 by murphy <korny@cYcnus.de> +# GPL +class Scenario + class TimePoint + attr_reader :data + def initialize *data + @data = data + end + + def [] i + @data[i] or 0 + end + + include Comparable + def <=> tp + r = 0 + [@data.size, tp.data.size].max.times do |i| + r = self[i] <=> tp[i] + return r if r.nonzero? + end + 0 + end + + def - tp + r = [] + [@data.size, tp.data.size].max.times do |i| + r << self[i] - tp[i] + end + r + end + + def inspect + # 01/01/1800 00:00:00 + '%02d/%02d/%04d %02d:%02d:%02d' % @data.values_at(1, 2, 0, 3, 4, 5) + end + end + + ONE_HOUR = TimePoint.new 0, 0, 0, 1, 0, 0 + + APPOINTMENT_PATTERN = / + ( \d{4} ) \s ( \d{2} ) \s ( \d{2} ) \s ( \d{2} ) \s ( \d{2} ) \s ( \d{2} ) \s + ( \d{4} ) \s ( \d{2} ) \s ( \d{2} ) \s ( \d{2} ) \s ( \d{2} ) \s ( \d{2} ) + /x + + def initialize io + @team_size = io.gets.to_i + @data = [ [TimePoint.new(1800, 01, 01, 00, 00, 00), @team_size] ] + @team_size.times do # each team member + io.gets.to_i.times do # each appointment + m = APPOINTMENT_PATTERN.match io.gets + @data << [TimePoint.new(*m.captures[0,6].map { |x| x.to_i }), -1] + @data << [TimePoint.new(*m.captures[6,6].map { |x| x.to_i }), +1] + end + end + @data << [TimePoint.new(2200, 01, 01, 00, 00, 00), -@team_size] + end + + def print_time_plan + n = 0 + appointment = nil + no_appointment = true + @data.sort_by { |x| x[0] }.each do |x| + tp, action = *x + n += action + # at any time during the meeting, at least two team members need to be there + # and at most one team member is allowed to be absent + if n >= 2 and (@team_size - n) <= 1 + appointment ||= tp + else + if appointment + # the meeting should be at least one hour in length + if TimePoint.new(*(tp - appointment)) >= ONE_HOUR + puts 'appointment possible from %p to %p' % [appointment, tp] + no_appointment = false + end + appointment = false + end + end + end + puts 'no appointment possible' if no_appointment + end +end + +# read the data +DATA.gets.to_i.times do |si| # each scenario + puts 'Scenario #%d:' % (si + 1) + sc = Scenario.new DATA + sc.print_time_plan + puts +end + +#__END__ +2 +3 +3 +2002 06 28 15 00 00 2002 06 28 18 00 00 TUD Contest Practice Session +2002 06 29 10 00 00 2002 06 29 15 00 00 TUD Contest +2002 11 15 15 00 00 2002 11 17 23 00 00 NWERC Delft +4 +2002 06 25 13 30 00 2002 06 25 15 30 00 FIFA World Cup Semifinal I +2002 06 26 13 30 00 2002 06 26 15 30 00 FIFA World Cup Semifinal II +2002 06 29 13 00 00 2002 06 29 15 00 00 FIFA World Cup Third Place +2002 06 30 13 00 00 2002 06 30 15 00 00 FIFA World Cup Final +1 +2002 06 01 00 00 00 2002 06 29 18 00 00 Preparation of Problem Set +2 +1 +1800 01 01 00 00 00 2200 01 01 00 00 00 Solving Problem 8 +0 + +require 'token_consts' +require 'symbol' +require 'ctype' +require 'error' + +class Fixnum + # Treat char as a digit and return it's value as Fixnum. + # Returns nonsense for non-digits. + # Examples: + # <code> + # RUBY_VERSION[0].digit == '1.8.2'[0].digit == 1 + # </code> + # + # <code> + # ?6.digit == 6 + # </code> + # + # <code> + # ?A.digit == 17 + # </code> + def digit + self - ?0 + end +end + +## +# Stellt einen einfachen Scanner fr die lexikalische Analyse der Sprache Pas-0 dar. +# +# @author Andreas Kunert +# Ruby port by murphy +class Scanner + + include TokenConsts + + attr_reader :line, :pos + + # To allow Scanner.new without parameters. + DUMMY_INPUT = 'dummy file' + def DUMMY_INPUT.getc + nil + end + + ## + # Erzeugt einen Scanner, der als Eingabe das bergebene IO benutzt. + def initialize input = DUMMY_INPUT + @line = 1 + @pos = 0 + + begin + @input = input + @next_char = @input.getc + rescue IOError # TODO show the reason! + Error.ioError + raise + end + end + + ## + # Liest das n + def read_next_char + begin + @pos += 1 + @current_char = @next_char + @next_char = @input.getc + rescue IOError + Error.ioError + raise + end + end + + ## + # Sucht das n + # PascalSymbol-Objekt und gibt es zurck. + # @see Symbol + # @return das gefundene Symbol als PascalSymbol-Objekt + def get_symbol + current_symbol = nil + until current_symbol + read_next_char + + if @current_char.alpha? + identifier = @current_char.chr + while @next_char.alpha? or @next_char.digit? + identifier << @next_char + read_next_char + end + current_symbol = handle_identifier(identifier.upcase) + elsif @current_char.digit? + current_symbol = number + else + case @current_char + when ?\s + # ignore + when ?\n + new_line + when nil + current_symbol = PascalSymbol.new EOP + when ?{ + comment + + when ?: + if @next_char == ?= + read_next_char + current_symbol = PascalSymbol.new BECOMES + else + current_symbol = PascalSymbol.new COLON + end + + when ?< + if (@next_char == ?=) + read_next_char + current_symbol = PascalSymbol.new LEQSY + elsif (@next_char == ?>) + read_next_char + current_symbol = PascalSymbol.new NEQSY + else + current_symbol = PascalSymbol.new LSSSY + end + + when ?> + if (@next_char == ?=) + read_next_char + current_symbol = PascalSymbol.new GEQSY + else + current_symbol = PascalSymbol.new GRTSY + end + + when ?. then current_symbol = PascalSymbol.new PERIOD + when ?( then current_symbol = PascalSymbol.new LPARENT + when ?, then current_symbol = PascalSymbol.new COMMA + when ?* then current_symbol = PascalSymbol.new TIMES + when ?/ then current_symbol = PascalSymbol.new SLASH + when ?+ then current_symbol = PascalSymbol.new PLUS + when ?- then current_symbol = PascalSymbol.new MINUS + when ?= then current_symbol = PascalSymbol.new EQLSY + when ?) then current_symbol = PascalSymbol.new RPARENT + when ?; then current_symbol = PascalSymbol.new SEMICOLON + else + Error.error(100, @line, @pos) if @current_char > ?\s + end + end + end + current_symbol + end + +private + ## + # Versucht, in dem gegebenen String ein Schlsselwort zu erkennen. + # Sollte dabei ein Keyword gefunden werden, so gibt er ein PascalSymbol-Objekt zurck, das + # das entsprechende Keyword repr + # einem SymbolIdent-Objekt (abgeleitet von PascalSymbol), das den String 1:1 enth + # @see symbol + # @return falls Keyword gefunden, zugeh + def handle_identifier identifier + if sym = KEYWORD_SYMBOLS[identifier] + PascalSymbol.new sym + else + SymbolIdent.new identifier + end + end + + MAXINT = 2**31 - 1 + MAXINT_DIV_10 = MAXINT / 10 + MAXINT_MOD_10 = MAXINT % 10 + ## + # Versucht, aus dem gegebenen Zeichen und den folgenden eine Zahl zusammenzusetzen. + # Dabei wird der relativ intuitive Algorithmus benutzt, die endgltige Zahl bei + # jeder weiteren Ziffer mit 10 zu multiplizieren und diese dann mit der Ziffer zu + # addieren. Sonderf + # <BR> + # Treten dabei kein Punkt oder ein E auf, so gibt diese Methode ein SymbolIntCon-Objekt + # zurck, ansonsten (reelle Zahl) ein SymbolRealCon-Objekt. Beide Symbole enthalten + # jeweils die Zahlwerte. + # <BR> + # Anmerkung: Diese Funktion ist mit Hilfe der Java/Ruby-API deutlich leichter zu realisieren. + # Sie wurde dennoch so implementiert, um den Algorithmus zu demonstrieren + # @see symbol + # @return SymbolIntcon- oder SymbolRealcon-Objekt, das den Zahlwert enth + def number + is_integer = true + integer_too_long = false + exponent = 0 + exp_counter = -1 + exp_sign = 1 + + integer_mantisse = @current_char.digit + + while (@next_char.digit? and integer_mantisse < MAXINT_DIV_10) or + (integer_mantisse == MAXINT_DIV_10 and @next_char.digit <= MAXINT_MOD_10) + integer_mantisse *= 10 + integer_mantisse += @next_char.digit + read_next_char + end + + real_mantisse = integer_mantisse + + while @next_char.digit? + integer_too_long = true + real_mantisse *= 10 + real_mantisse += @next_char.digit + read_next_char + end + if @next_char == ?. + read_next_char + is_integer = false + unless @next_char.digit? + Error.error 101, @line, @pos + end + while @next_char.digit? + real_mantisse += @next_char.digit * (10 ** exp_counter) + read_next_char + exp_counter -= 1 + end + end + if @next_char == ?E + is_integer = false + read_next_char + if @next_char == ?- + exp_sign = -1 + read_next_char + end + unless @next_char.digit? + Error.error 101, @line, @pos + end + while @next_char.digit? + exponent *= 10 + exponent += @next_char.digit + read_next_char + end + end + + if is_integer + if integer_too_long + Error.error 102, @line, @pos + end + SymbolIntcon.new integer_mantisse + else + SymbolRealcon.new real_mantisse * (10 ** (exp_sign * exponent)) + end + end + + ## + # Sorgt fr ein + # Es werden einfach alle Zeichen bis zu einer schlie + # und verworfen. + def comment + while @current_char != ?} + forbid_eop + new_line if @current_char == ?\n + read_next_char + end + end + + def new_line + @line += 1 + @pos = 0 + end + + def forbid_eop + if eop? + Error.error 103, @line, @pos + end + exit + end + + def eop? + @current_char.nil? + end +end + +## +# L +# Dieses erzeugt sich ein Scanner-Objekt und ruft an diesem kontinuierlich bis zum Dateiende +# get_symbol auf. +if $0 == __FILE__ + scan = Scanner.new(File.new(ARGV[0] || 'test.pas')) + loop do + c = scan.get_symbol + puts c + break if c.typ == TokenConsts::EOP + end +end +# -*- ruby -*- + +# Local variables: +# indent-tabs-mode: nil +# ruby-indent-level: 4 +# End: + +# @@PLEAC@@_NAME +# @@SKIP@@ Ruby + +# @@PLEAC@@_WEB +# @@SKIP@@ http://www.ruby-lang.org + + +# @@PLEAC@@_1.0 +string = '\n' # two characters, \ and an n +string = 'Jon \'Maddog\' Orwant' # literal single quotes + +string = "\n" # a "newline" character +string = "Jon \"Maddog\" Orwant" # literal double quotes + +string = %q/Jon 'Maddog' Orwant/ # literal single quotes + +string = %q[Jon 'Maddog' Orwant] # literal single quotes +string = %q{Jon 'Maddog' Orwant} # literal single quotes +string = %q(Jon 'Maddog' Orwant) # literal single quotes +string = %q<Jon 'Maddog' Orwant> # literal single quotes + +a = <<"EOF" +This is a multiline here document +terminated by EOF on a line by itself +EOF + + +# @@PLEAC@@_1.1 +value = string[offset,count] +value = string[offset..-1] + +string[offset,count] = newstring +string[offset..-1] = newtail + +# in Ruby we can also specify intervals by their two offsets +value = string[offset..offs2] +string[offset..offs2] = newstring + +leading, s1, s2, trailing = data.unpack("A5 x3 A8 A8 A*") + +fivers = string.unpack("A5" * (string.length/5)) + +chars = string.unpack("A1" * string.length) + +string = "This is what you have" +# +012345678901234567890 Indexing forwards (left to right) +# 109876543210987654321- Indexing backwards (right to left) +# note that 0 means 10 or 20, etc. above + +first = string[0, 1] # "T" +start = string[5, 2] # "is" +rest = string[13..-1] # "you have" +last = string[-1, 1] # "e" +end_ = string[-4..-1] # "have" +piece = string[-8, 3] # "you" + +string[5, 2] = "wasn't" # change "is" to "wasn't" +string[-12..-1] = "ondrous" # "This wasn't wondrous" +string[0, 1] = "" # delete first character +string[-10..-1] = "" # delete last 10 characters + +if string[-10..-1] =~ /pattern/ + puts "Pattern matches in last 10 characters" +end + +string[0, 5].gsub!(/is/, 'at') + +a = "make a hat" +a[0, 1], a[-1, 1] = a[-1, 1], a[0, 1] + +a = "To be or not to be" +b = a.unpack("x6 A6") + +b, c = a.unpack("x6 A2 X5 A2") +puts "#{b}\n#{c}\n" + +def cut2fmt(*args) + template = '' + lastpos = 1 + for place in args + template += "A" + (place - lastpos).to_s + " " + lastpos = place + end + template += "A*" + return template +end + +fmt = cut2fmt(8, 14, 20, 26, 30) + + +# @@PLEAC@@_1.2 +# careful! "b is true" doesn't mean "b != 0" (0 is true in Ruby) +# thus no problem of "defined" later since only nil is false +# the following sets to `c' if `b' is nil or false +a = b || c + +# if you need Perl's behaviour (setting to `c' if `b' is 0) the most +# effective way is to use Numeric#nonzero? (thanks to Dave Thomas!) +a = b.nonzero? || c + +# you will still want to use defined? in order to test +# for scope existence of a given object +a = defined?(b) ? b : c + +dir = ARGV.shift || "/tmp" + + +# @@PLEAC@@_1.3 +v1, v2 = v2, v1 + +alpha, beta, production = %w(January March August) +alpha, beta, production = beta, production, alpha + + +# @@PLEAC@@_1.4 +num = char[0] +char = num.chr + +# Ruby also supports having a char from character constant +num = ?r + +char = sprintf("%c", num) +printf("Number %d is character %c\n", num, num) + +ascii = string.unpack("C*") +string = ascii.pack("C*") + +hal = "HAL" +ascii = hal.unpack("C*") +# We can't use Array#each since we can't mutate a Fixnum +ascii.collect! { |i| + i + 1 # add one to each ASCII value +} +ibm = ascii.pack("C*") +puts ibm + + +# @@PLEAC@@_1.5 +array = string.split('') + +array = string.unpack("C*") + +string.scan(/./) { |b| + # do something with b +} + +string = "an apple a day" +print "unique chars are: ", string.split('').uniq.sort, "\n" + +sum = 0 +for ascval in string.unpack("C*") # or use Array#each for a pure OO style :) + sum += ascval +end +puts "sum is #{sum & 0xffffffff}" # since Ruby will go Bignum if necessary + +# @@INCLUDE@@ include/ruby/slowcat.rb + + +# @@PLEAC@@_1.6 +revbytes = string.reverse + +revwords = string.split(" ").reverse.join(" ") + +revwords = string.split(/(\s+)/).reverse.join + +# using the fact that IO is Enumerable, you can directly "select" it +long_palindromes = File.open("/usr/share/dict/words"). + select { |w| w.chomp!; w.reverse == w && w.length > 5 } + + +# @@PLEAC@@_1.7 +while string.sub!("\t+") { ' ' * ($&.length * 8 - $`.length % 8) } +end + + +# @@PLEAC@@_1.8 +'You owe #{debt} to me'.gsub(/\#{(\w+)}/) { eval($1) } + +rows, cols = 24, 80 +text = %q(I am #{rows} high and #{cols} long) +text.gsub!(/\#{(\w+)}/) { eval("#{$1}") } +puts text + +'I am 17 years old'.gsub(/\d+/) { 2 * $&.to_i } + + +# @@PLEAC@@_1.9 +e = "bo peep".upcase +e.downcase! +e.capitalize! + +"thIS is a loNG liNE".gsub!(/\w+/) { $&.capitalize } + + +# @@PLEAC@@_1.10 +"I have #{n+1} guanacos." +print "I have ", n+1, " guanacos." + + +# @@PLEAC@@_1.11 +var = <<'EOF'.gsub(/^\s+/, '') + your text + goes here +EOF + + +# @@PLEAC@@_1.12 +string = "Folding and splicing is the work of an editor,\n"+ + "not a mere collection of silicon\n"+ + "and\n"+ + "mobile electrons!" + +def wrap(str, max_size) + all = [] + line = '' + for l in str.split + if (line+l).length >= max_size + all.push(line) + line = '' + end + line += line == '' ? l : ' ' + l + end + all.push(line).join("\n") +end + +print wrap(string, 20) +#=> Folding and +#=> splicing is the +#=> work of an editor, +#=> not a mere +#=> collection of +#=> silicon and mobile +#=> electrons! + + +# @@PLEAC@@_1.13 +string = %q(Mom said, "Don't do that.") +string.gsub(/['"]/) { '\\'+$& } +string.gsub(/['"]/, '\&\&') +string.gsub(/[^A-Z]/) { '\\'+$& } +"is a test!".gsub(/\W/) { '\\'+$& } # no function like quotemeta? + + +# @@PLEAC@@_1.14 +string.strip! + + +# @@PLEAC@@_1.15 +def parse_csv(text) + new = text.scan(/"([^\"\\]*(?:\\.[^\"\\]*)*)",?|([^,]+),?|,/) + new << nil if text[-1] == ?, + new.flatten.compact +end + +line = %q<XYZZY,"","O'Reilly, Inc","Wall, Larry","a \"glug\" bit,",5,"Error, Core Dumped"> +fields = parse_csv(line) +fields.each_with_index { |v,i| + print "#{i} : #{v}\n"; +} + + +# @@PLEAC@@_1.16 +# Use the soundex.rb Library from Michael Neumann. +# http://www.s-direktnet.de/homepages/neumann/rb_prgs/Soundex.rb +require 'Soundex' + +code = Text::Soundex.soundex(string) +codes = Text::Soundex.soundex(array) + +# substitution function for getpwent(): +# returns an array of user entries, +# each entry contains the username and the full name +def login_names + result = [] + File.open("/etc/passwd") { |file| + file.each_line { |line| + next if line.match(/^#/) + cols = line.split(":") + result.push([cols[0], cols[4]]) + } + } + result +end + +puts "Lookup user: " +user = STDIN.gets +user.chomp! +exit unless user +name_code = Text::Soundex.soundex(user) + +splitter = Regexp.new('(\w+)[^,]*\b(\w+)') +for username, fullname in login_names do + firstname, lastname = splitter.match(fullname)[1,2] + if name_code == Text::Soundex.soundex(username) + || name_code == Text::Soundex.soundex(firstname) + || name_code == Text::Soundex.soundex(lastname) + then + puts "#{username}: #{firstname} #{lastname}" + end +end + + +# @@PLEAC@@_1.17 +# @@INCLUDE@@ include/ruby/fixstyle.rb + + +# @@PLEAC@@_1.18 +# @@INCLUDE@@ include/ruby/psgrep.rb + + +# @@PLEAC@@_2.1 +# Matz tells that you can use Integer() for strict checked conversion. +Integer("abc") +#=> `Integer': invalid value for Integer: "abc" (ArgumentError) +Integer("567") +#=> 567 + +# You may use Float() for floating point stuff +Integer("56.7") +#=> `Integer': invalid value for Integer: "56.7" (ArgumentError) +Float("56.7") +#=> 56.7 + +# You may also use a regexp for that +if string =~ /^[+-]?\d+$/ + p 'is an integer' +else + p 'is not' +end + +if string =~ /^-?(?:\d+(?:\.\d*)?|\.\d+)$/ + p 'is a decimal number' +else + p 'is not' +end + + +# @@PLEAC@@_2.2 +# equal(num1, num2, accuracy) : returns true if num1 and num2 are +# equal to accuracy number of decimal places +def equal(i, j, a) + sprintf("%.#{a}g", i) == sprintf("%.#{a}g", j) +end + +wage = 536 # $5.36/hour +week = 40 * wage # $214.40 +printf("One week's wage is: \$%.2f\n", week/100.0) + + +# @@PLEAC@@_2.3 +num.round # rounds to integer + +a = 0.255 +b = sprintf("%.2f", a) +print "Unrounded: #{a}\nRounded: #{b}\n" +printf "Unrounded: #{a}\nRounded: %.2f\n", a + +print "number\tint\tfloor\tceil\n" +a = [ 3.3 , 3.5 , 3.7, -3.3 ] +for n in a + printf("% .1f\t% .1f\t% .1f\t% .1f\n", # at least I don't fake my output :) + n, n.to_i, n.floor, n.ceil) +end + + +# @@PLEAC@@_2.4 +def dec2bin(n) + [n].pack("N").unpack("B32")[0].sub(/^0+(?=\d)/, '') +end + +def bin2dec(n) + [("0"*32+n.to_s)[-32..-1]].pack("B32").unpack("N")[0] +end + + +# @@PLEAC@@_2.5 +for i in x .. y + # i is set to every integer from x to y, inclusive +end + +x.step(y,7) { |i| + # i is set to every integer from x to y, stepsize = 7 +} + +print "Infancy is: " +(0..2).each { |i| + print i, " " +} +print "\n" + + +# @@PLEAC@@_2.6 +# We can add conversion methods to the Integer class, +# this makes a roman number just a representation for normal numbers. +class Integer + + @@romanlist = [["M", 1000], + ["CM", 900], + ["D", 500], + ["CD", 400], + ["C", 100], + ["XC", 90], + ["L", 50], + ["XL", 40], + ["X", 10], + ["IX", 9], + ["V", 5], + ["IV", 4], + ["I", 1]] + + def to_roman + remains = self + roman = "" + for sym, num in @@romanlist + while remains >= num + remains -= num + roman << sym + end + end + roman + end + + def Integer.from_roman(roman) + ustr = roman.upcase + sum = 0 + for entry in @@romanlist + sym, num = entry[0], entry[1] + while sym == ustr[0, sym.length] + sum += num + ustr.slice!(0, sym.length) + end + end + sum + end + +end + + +roman_fifteen = 15.to_roman +puts "Roman for fifteen is #{roman_fifteen}" +i = Integer.from_roman(roman_fifteen) +puts "Converted back, #{roman_fifteen} is #{i}" + +# check +for i in (1..3900) + r = i.to_roman + j = Integer.from_roman(r) + if i != j + puts "error: #{i} : #{r} - #{j}" + end +end + + +# @@PLEAC@@_2.7 +random = rand(y-x+1)+x + +chars = ["A".."Z","a".."z","0".."9"].collect { |r| r.to_a }.join + %q(!@$%^&*) +password = (1..8).collect { chars[rand(chars.size)] }.pack("C*") + + +# @@PLEAC@@_2.8 +srand # uses a combination of the time, the process id, and a sequence number +srand(val) # for repeatable behaviour + + +# @@PLEAC@@_2.9 +# from the randomr lib: +# http://raa.ruby-lang.org/project/randomr/ +----> http://raa.ruby-lang.org/project/randomr/ + +require 'random/mersenne_twister' +mers = Random::MersenneTwister.new 123456789 +puts mers.rand(0) # 0.550321932544541 +puts mers.rand(10) # 2 + +# using online sources of random data via the realrand package: +# http://raa.ruby-lang.org/project/realrand/ +# **Note** +# The following online services are used in this package: +# http://www.random.org - source: atmospheric noise +# http://www.fourmilab.ch/hotbits - source: radioactive decay timings +# http://random.hd.org - source: entropy from local and network noise +# Please visit the sites and respect the rules of each service. + +require 'random/online' + +generator1 = Random::RandomOrg.new +puts generator1.randbyte(5).join(",") +puts generator1.randnum(10, 1, 6).join(",") # Roll dice 10 times. + +generator2 = Random::FourmiLab.new +puts generator2.randbyte(5).join(",") +# randnum is not supported. + +generator3 = Random::EntropyPool.new +puts generator3.randbyte(5).join(",") +# randnum is not supported. + + +# @@PLEAC@@_2.10 +def gaussian_rand + begin + u1 = 2 * rand() - 1 + u2 = 2 * rand() - 1 + w = u1*u1 + u2*u2 + end while (w >= 1) + w = Math.sqrt((-2*Math.log(w))/w) + [ u2*w, u1*w ] +end + +mean = 25 +sdev = 2 +salary = gaussian_rand[0] * sdev + mean +printf("You have been hired at \$%.2f\n", salary) + + +# @@PLEAC@@_2.11 +def deg2rad(d) + (d/180.0)*Math::PI +end + +def rad2deg(r) + (r/Math::PI)*180 +end + + +# @@PLEAC@@_2.12 +sin_val = Math.sin(angle) +cos_val = Math.cos(angle) +tan_val = Math.tan(angle) + +# AFAIK Ruby's Math module doesn't provide acos/asin +# While we're at it, let's also define missing hyperbolic functions +module Math + def Math.asin(x) + atan2(x, sqrt(1 - x**2)) + end + def Math.acos(x) + atan2(sqrt(1 - x**2), x) + end + def Math.atan(x) + atan2(x, 1) + end + def Math.sinh(x) + (exp(x) - exp(-x)) / 2 + end + def Math.cosh(x) + (exp(x) + exp(-x)) / 2 + end + def Math.tanh(x) + sinh(x) / cosh(x) + end +end + +# The support for Complex numbers is not built-in +y = Math.acos(3.7) +#=> in `sqrt': square root for negative number (ArgumentError) + +# There is an implementation of Complex numbers in 'complex.rb' in current +# Ruby distro, but it doesn't support atan2 with complex args, so it doesn't +# solve this problem. + + +# @@PLEAC@@_2.13 +log_e = Math.log(val) +log_10 = Math.log10(val) + +def log_base(base, val) + Math.log(val)/Math.log(base) +end + +answer = log_base(10, 10_000) +puts "log10(10,000) = #{answer}" + + +# @@PLEAC@@_2.14 +require 'matrix.rb' + +a = Matrix[[3, 2, 3], [5, 9, 8]] +b = Matrix[[4, 7], [9, 3], [8, 1]] +c = a * b + +a.row_size +a.column_size + +c.det +a.transpose + + +# @@PLEAC@@_2.15 +require 'complex.rb' +require 'rational.rb' + +a = Complex(3, 5) # 3 + 5i +b = Complex(2, -2) # 2 - 2i +puts "c = #{a*b}" + +c = a * b +d = 3 + 4*Complex::I + +printf "sqrt(#{d}) = %s\n", Math.sqrt(d) + + +# @@PLEAC@@_2.16 +number = hexadecimal.hex +number = octal.oct + +print "Gimme a number in decimal, octal, or hex: " +num = gets.chomp +exit unless defined?(num) +num = num.oct if num =~ /^0/ # does both oct and hex +printf "%d %x %o\n", num, num, num + +print "Enter file permission in octal: " +permissions = gets.chomp +raise "Exiting ...\n" unless defined?(permissions) +puts "The decimal value is #{permissions.oct}" + + +# @@PLEAC@@_2.17 +def commify(n) + n.to_s =~ /([^\.]*)(\..*)?/ + int, dec = $1.reverse, $2 ? $2 : "" + while int.gsub!(/(,|\.|^)(\d{3})(\d)/, '\1\2,\3') + end + int.reverse + dec +end + + +# @@PLEAC@@_2.18 +printf "It took %d hour%s\n", time, time == 1 ? "" : "s" + +# dunno if an equivalent to Lingua::EN::Inflect exists... + + +# @@PLEAC@@_2.19 +#----------------------------- +#!/usr/bin/ruby +# bigfact - calculating prime factors +def factorize(orig) + factors = {} + factors.default = 0 # return 0 instead nil if key not found in hash + n = orig + i = 2 + sqi = 4 # square of i + while sqi <= n do + while n.modulo(i) == 0 do + n /= i + factors[i] += 1 + # puts "Found factor #{i}" + end + # we take advantage of the fact that (i +1)**2 = i**2 + 2*i +1 + sqi += 2 * i + 1 + i += 1 + end + + if (n != 1) && (n != orig) + factors[n] += 1 + end + factors +end + +def printfactorhash(orig, factorcount) + print format("%-10d ", orig) + if factorcount.length == 0 + print "PRIME" + else + # sorts after number, because the hash keys are numbers + factorcount.sort.each { |factor,exponent| + print factor + if exponent > 1 + print "**", exponent + end + print " " + } + end + puts +end + +for arg in ARGV + n = arg.to_i + mfactors = factorize(n) + printfactorhash(n, mfactors) +end +#----------------------------- + + +# @@PLEAC@@_3.0 +puts Time.now + +print "Today is day ", Time.now.yday, " of the current year.\n" +print "Today is day ", Time.now.day, " of the current month.\n" + + +# @@PLEAC@@_3.1 +day, month, year = Time.now.day, Time.now.month, Time.now.year +# or +day, month, year = Time.now.to_a[3..5] + +tl = Time.now.localtime +printf("The current date is %04d %02d %02d\n", tl.year, tl.month, tl.day) + +Time.now.localtime.strftime("%Y-%m-%d") + + +# @@PLEAC@@_3.2 +Time.local(year, month, day, hour, minute, second).tv_sec +Time.gm(year, month, day, hour, minute, second).tv_sec + + +# @@PLEAC@@_3.3 +sec, min, hour, day, month, year, wday, yday, isdst, zone = Time.at(epoch_secs).to_a + + +# @@PLEAC@@_3.4 +when_ = now + difference # now -> Time ; difference -> Numeric (delta in seconds) +then_ = now - difference + + +# @@PLEAC@@_3.5 +bree = 361535725 +nat = 96201950 + +difference = bree - nat +puts "There were #{difference} seconds between Nat and Bree" + +seconds = difference % 60 +difference = (difference - seconds) / 60 +minutes = difference % 60 +difference = (difference - minutes) / 60 +hours = difference % 24 +difference = (difference - hours) / 24 +days = difference % 7 +weeks = (difference - days) / 7 + +puts "(#{weeks} weeks, #{days} days, #{hours}:#{minutes}:#{seconds})" + + +# @@PLEAC@@_3.6 +monthday, weekday, yearday = date.mday, date.wday, date.yday + +# AFAIK the week number is not just a division since week boundaries are on sundays +weeknum = d.strftime("%U").to_i + 1 + +year = 1981 +month = "jun" # or `6' if you want to emulate a broken language +day = 16 +t = Time.mktime(year, month, day) +print "#{month}/#{day}/#{year} was a ", t.strftime("%A"), "\n" + + +# @@PLEAC@@_3.7 +yyyy, mm, dd = $1, $2, $3 if "1998-06-25" =~ /(\d+)-(\d+)-(\d+)/ + +epoch_seconds = Time.mktime(yyyy, mm, dd).tv_sec + +# dunno an equivalent to Date::Manip#ParseDate + + +# @@PLEAC@@_3.8 +string = Time.at(epoch_secs) +Time.at(1234567890).gmtime # gives: Fri Feb 13 23:31:30 UTC 2009 + +time = Time.mktime(1973, "jan", 18, 3, 45, 50) +print "In localtime it gives: ", time.localtime, "\n" + + +# @@PLEAC@@_3.9 +# Ruby provides micro-seconds in Time object +Time.now.usec + +# Ruby gives the seconds in floating format when substracting two Time objects +before = Time.now +line = gets +elapsed = Time.now - before +puts "You took #{elapsed} seconds." + +# On my Celeron-400 with Linux-2.2.19-14mdk, average for three execs are: +# This Ruby version: average 0.00321 sec +# Cookbook's Perl version: average 0.00981 sec +size = 500 +number_of_times = 100 +total_time = 0 +number_of_times.times { + # populate array + array = [] + size.times { array << rand } + # sort it + begin_ = Time.now + array.sort! + time = Time.now - begin_ + total_time += time +} +printf "On average, sorting %d random numbers takes %.5f seconds\n", + size, (total_time/Float(number_of_times)) + + +# @@PLEAC@@_3.10 +sleep(0.005) # Ruby is definitely not as broken as Perl :) +# (may be interrupted by sending the process a SIGALRM) + + +# @@PLEAC@@_3.11 +#!/usr/bin/ruby -w +# hopdelta - feed mail header, produce lines +# showing delay at each hop. +require 'time' +class MailHopDelta + + def initialize(mail) + @head = mail.gsub(/\n\s+/,' ') + @topline = %w-Sender Recipient Time Delta- + @start_from = mail.match(/^From.*\@([^\s>]*)/)[1] + @date = Time.parse(mail.match(/^Date:\s+(.*)/)[1]) + end + + def out(line) + "%-20.20s %-20.20s %-20.20s %s" % line + end + + def hop_date(day) + day.strftime("%I:%M:%S %Y/%m/%d") + end + + def puts_hops + puts out(@topline) + puts out(['Start', @start_from, hop_date(@date),'']) + @head.split(/\n/).reverse.grep(/^Received:/).each do |hop| + hop.gsub!(/\bon (.*?) (id.*)/,'; \1') + whence = hop.match(/;\s+(.*)$/)[1] + unless whence + warn "Bad received line: #{hop}" + next + end + from = $+ if hop =~ /from\s+(\S+)|\((.*?)\)/ + by = $1 if hop =~ /by\s+(\S+\.\S+)/ + next unless now = Time.parse(whence).localtime + delta = now - @date + puts out([from, by, hop_date(now), hop_time(delta)]) + @date = now + end + end + + def hop_time(secs) + sign = secs < 0 ? -1 : 1 + days, secs = secs.abs.divmod(60 * 60 * 24) + hours,secs = secs.abs.divmod(60 * 60) + mins, secs = secs.abs.divmod(60) + rtn = "%3ds" % [secs * sign] + rtn << "%3dm" % [mins * sign] if mins != 0 + rtn << "%3dh" % [hours * sign] if hours != 0 + rtn << "%3dd" % [days * sign] if days != 0 + rtn + end +end + +$/ = "" +mail = MailHopDelta.new(ARGF.gets).puts_hops + + +# @@PLEAC@@_4.0 +single_level = [ "this", "that", "the", "other" ] + +# Ruby directly supports nested arrays +double_level = [ "this", "that", [ "the", "other" ] ] +still_single_level = [ "this", "that", [ "the", "other" ] ].flatten + + +# @@PLEAC@@_4.1 +a = [ "quick", "brown", "fox" ] +a = %w(Why are you teasing me?) + +lines = <<"END_OF_HERE_DOC".gsub(/^\s*(.+)/, '\1') + The boy stood on the burning deck, + It was as hot as glass. +END_OF_HERE_DOC + +bigarray = IO.readlines("mydatafile").collect { |l| l.chomp } + +name = "Gandalf" +banner = %Q(Speak, #{name}, and welcome!) + +host_info = `host #{his_host}` + +%x(ps #{$$}) + +banner = 'Costs only $4.95'.split(' ') + +rax = %w! ( ) < > { } [ ] ! + + +# @@PLEAC@@_4.2 +def commify_series(a) + a.size == 0 ? '' : + a.size == 1 ? a[0] : + a.size == 2 ? a.join(' and ') : + a[0..-2].join(', ') + ', and ' + a[-1] +end + +array = [ "red", "yellow", "green" ] + +print "I have ", array, " marbles\n" +# -> I have redyellowgreen marbles + +# But unlike Perl: +print "I have #{array} marbles\n" +# -> I have redyellowgreen marbles +# So, needs: +print "I have #{array.join(' ')} marbles\n" +# -> I have red yellow green marbles + +def commify_series(a) + sepchar = a.select { |p| p =~ /,/ } != [] ? '; ' : ', ' + a.size == 0 ? '' : + a.size == 1 ? a[0] : + a.size == 2 ? a.join(' and ') : + a[0..-2].join(sepchar) + sepchar + 'and ' + a[-1] +end + + +# @@PLEAC@@_4.3 +# (note: AFAIK Ruby doesn't allow gory change of Array length) +# grow the array by assigning nil to past the end of array +ary[new_size-1] = nil +# shrink the array by slicing it down +ary.slice!(new_size..-1) +# init the array with given size +Array.new(number_of_elems) +# assign to an element past the original end enlarges the array +ary[index_new_last_elem] = value + +def what_about_that_array(a) + print "The array now has ", a.size, " elements.\n" + # Index of last element is not really interesting in Ruby + print "Element #3 is `#{a[3]}'.\n" +end +people = %w(Crosby Stills Nash Young) +what_about_that_array(people) + + +# @@PLEAC@@_4.4 +# OO style +bad_users.each { |user| + complain(user) +} +# or, functional style +for user in bad_users + complain(user) +end + +for var in ENV.keys.sort + puts "#{var}=#{ENV[var]}" +end + +for user in all_users + disk_space = get_usage(user) + if (disk_space > MAX_QUOTA) + complain(user) + end +end + +for l in IO.popen("who").readlines + print l if l =~ /^gc/ +end + +# we can mimic the obfuscated Perl way +while fh.gets # $_ is set to the line just read + chomp # $_ has a trailing \n removed, if it had one + split.each { |w| # $_ is split on whitespace + # but $_ is not set to each chunk as in Perl + print w.reverse + } +end +# ...or use a cleaner way +for l in fh.readlines + l.chomp.split.each { |w| print w.reverse } +end + +# same drawback as in problem 1.4, we can't mutate a Numeric... +array.collect! { |v| v - 1 } + +a = [ .5, 3 ]; b = [ 0, 1 ] +for ary in [ a, b ] + ary.collect! { |v| v * 7 } +end +puts "#{a.join(' ')} #{b.join(' ')}" + +# we can mutate Strings, cool; we need a trick for the scalar +for ary in [ [ scalar ], array, hash.values ] + ary.each { |v| v.strip! } # String#strip rules :) +end + + +# @@PLEAC@@_4.5 +# not relevant in Ruby since we have always references +for item in array + # do somethingh with item +end + + +# @@PLEAC@@_4.6 +unique = list.uniq + +# generate a list of users logged in, removing duplicates +users = `who`.collect { |l| l =~ /(\w+)/; $1 }.sort.uniq +puts("users logged in: #{commify_series(users)}") # see 4.2 for commify_series + + +# @@PLEAC@@_4.7 +a - b +# [ 1, 1, 2, 2, 3, 3, 3, 4, 5 ] - [ 1, 2, 4 ] -> [3, 5] + + +# @@PLEAC@@_4.8 +union = a | b +intersection = a & b +difference = a - b + + +# @@PLEAC@@_4.9 +array1.concat(array2) +# if you will assign to another object, better use: +new_ary = array1 + array2 + +members = [ "Time", "Flies" ] +initiates = [ "An", "Arrow" ] +members += initiates + +members = [ "Time", "Flies" ] +initiates = [ "An", "Arrow" ] +members[2,0] = [ "Like", initiates ].flatten + +members[0] = "Fruit" +members[3,2] = "A", "Banana" + + +# @@PLEAC@@_4.10 +reversed = ary.reverse + +ary.reverse_each { |e| + # do something with e +} + +descending = ary.sort.reverse +descending = ary.sort { |a,b| b <=> a } + + +# @@PLEAC@@_4.11 +# remove n elements from front of ary (shift n) +front = ary.slice!(0, n) + +# remove n elements from the end of ary (pop n) +end_ = ary.slice!(-n .. -1) + +# let's extend the Array class, to make that useful +class Array + def shift2() + slice!(0 .. 1) # more symetric with pop2... + end + def pop2() + slice!(-2 .. -1) + end +end + +friends = %w(Peter Paul Mary Jim Tim) +this, that = friends.shift2 + +beverages = %w(Dew Jolt Cola Sprite Fresca) +pair = beverages.pop2 + + +# @@PLEAC@@_4.12 +# use Enumerable#detect (or the synonym Enumerable#find) +highest_eng = employees.detect { |emp| emp.category == 'engineer' } + + +# @@PLEAC@@_4.13 +# use Enumerable#select (or the synonym Enumerable#find_all) +bigs = nums.select { |i| i > 1_000_000 } +pigs = users.keys.select { |k| users[k] > 1e7 } + +matching = `who`.select { |u| u =~ /^gnat / } + +engineers = employees.select { |e| e.position == 'Engineer' } + +secondary_assistance = applicants.select { |a| + a.income >= 26_000 && a.income < 30_000 +} + + +# @@PLEAC@@_4.14 +# normally you would have an array of Numeric (Float or +# Fixnum or Bignum), so you would use: +sorted = unsorted.sort +# if you have strings representing Integers or Floats +# you may specify another sort method: +sorted = unsorted.sort { |a,b| a.to_f <=> b.to_f } + +# let's use the list of my own PID's +`ps ux`.split("\n")[1..-1]. + select { |i| i =~ /^#{ENV['USER']}/ }. + collect { |i| i.split[1] }. + sort { |a,b| a.to_i <=> b.to_i }.each { |i| puts i } +puts "Select a process ID to kill:" +pid = gets.chomp +raise "Exiting ... \n" unless pid && pid =~ /^\d+$/ +Process.kill('TERM', pid.to_i) +sleep 2 +Process.kill('KILL', pid.to_i) + +descending = unsorted.sort { |a,b| b.to_f <=> a.to_f } + + +# @@PLEAC@@_4.15 +ordered = unordered.sort { |a,b| compare(a,b) } + +precomputed = unordered.collect { |e| [compute, e] } +ordered_precomputed = precomputed.sort { |a,b| a[0] <=> b[0] } +ordered = ordered_precomputed.collect { |e| e[1] } + +ordered = unordered.collect { |e| [compute, e] }. + sort { |a,b| a[0] <=> b[0] }. + collect { |e| e[1] } + +for employee in employees.sort { |a,b| a.name <=> b.name } + print employee.name, " earns \$ ", employee.salary, "\n" +end + +# Beware! `0' is true in Ruby. +# For chaining comparisons, you may use Numeric#nonzero?, which +# returns num if num is not zero, nil otherwise +sorted = employees.sort { |a,b| (a.name <=> b.name).nonzero? || b.age <=> a.age } + +users = [] +# getpwent is not wrapped in Ruby... let's fallback +IO.readlines('/etc/passwd').each { |u| users << u.split(':') } +users.sort! { |a,b| a[0] <=> b[0] } +for user in users + puts user[0] +end + +sorted = names.sort { |a,b| a[1, 1] <=> b[1, 1] } +sorted = strings.sort { |a,b| a.length <=> b.length } + +# let's show only the compact version +ordered = strings.collect { |e| [e.length, e] }. + sort { |a,b| a[0] <=> b[0] }. + collect { |e| e[1] } + +ordered = strings.collect { |e| [/\d+/.match(e)[0].to_i, e] }. + sort { |a,b| a[0] <=> b[0] }. + collect { |e| e[1] } + +print `cat /etc/passwd`.collect { |e| [e, e.split(':').indexes(3,2,0)].flatten }. + sort { |a,b| (a[1] <=> b[1]).nonzero? || (a[2] <=> b[2]).nonzero? || a[3] <=> b[3] }. + collect { |e| e[0] } + + +# @@PLEAC@@_4.16 +circular.unshift(circular.pop) # the last shall be first +circular.push(circular.shift) # and vice versa + +def grab_and_rotate(l) + l.push(ret = l.shift) + ret +end + +processes = [1, 2, 3, 4, 5] +while (1) + process = grab_and_rotate(processes) + puts "Handling process #{process}" + sleep 1 +end + + +# @@PLEAC@@_4.17 +def fisher_yates_shuffle(a) + (a.size-1).downto(1) { |i| + j = rand(i+1) + a[i], a[j] = a[j], a[i] if i != j + } +end + +def naive_shuffle(a) + for i in 0...a.size + j = rand(a.size) + a[i], a[j] = a[j], a[i] + end +end + + +# @@PLEAC@@_4.18 +#!/usr/bin/env ruby +# example 4-2 words +# words - gather lines, present in colums + +# class to encapsulate the word formatting from the input +class WordFormatter + def initialize(cols) + @cols = cols + end + + # helper to return the length of the longest word in the wordlist + def maxlen(wordlist) + max = 1 + for word in wordlist + if word.length > max + max = word.length + end + end + max + end + + # process the wordlist and print it formmated into columns + def output(wordlist) + collen = maxlen(wordlist) + 1 + columns = @cols / collen + columns = 1 if columns == 0 + rows = (wordlist.length + columns - 1) / columns + # now process each item, picking out proper piece for this position + 0.upto(rows * columns - 1) { |item| + target = (item % columns) * rows + (item / columns) + eol = ((item+1) % columns == 0) + piece = wordlist[target] || "" + piece = piece.ljust(collen) unless eol + print piece + puts if eol + } + # no need to finish it up, because eol is always true for the last element + end +end + +# get nr of chars that fit in window or console, see PLEAC 15.4 +# not portable -- linux only (?) +def getWinCharWidth() + buf = "\0" * 8 + $stdout.ioctl(0x5413, buf) + ws_row, ws_col, ws_xpixel, ws_ypixel = buf.unpack("$4") + ws_col || 80 +rescue + 80 +end + +# main program +cols = getWinCharWidth() +formatter = WordFormatter.new(cols) +words = readlines() +words.collect! { |line| + line.chomp +} +formatter.output(words) + + +# @@PLEAC@@_4.19 +# In ruby, Fixnum's are automatically converted to Bignum's when +# needed, so there is no need for an extra module +def factorial(n) + s = 1 + while n > 0 + s *= n + n -= 1 + end + s +end + +puts factorial(500) + +#--------------------------------------------------------- +# Example 4-3. tsc-permute +# tsc_permute: permute each word of input +def permute(items, perms) + unless items.length > 0 + puts perms.join(" ") + else + for i in items + newitems = items.dup + newperms = perms.dup + newperms.unshift(newitems.delete(i)) + permute(newitems, newperms) + end + end +end +# In ruby the main program must be after all definitions it is using +permute(ARGV, []) + +#--------------------------------------------------------- +# mjd_permute: permute each word of input + +def factorial(n) + s = 1 + while n > 0 + s *= n + n -= 1 + end + s +end + +# we use a class with a class variable store the private cache +# for the results of the factorial function. +class Factorial + @@fact = [ 1 ] + def Factorial.compute(n) + if @@fact[n] + @@fact[n] + else + @@fact[n] = n * Factorial.compute(n - 1) + end + end +end + +#--------------------------------------------------------- +# Example 4-4- mjd-permute +# n2pat(n, len): produce the N-th pattern of length len + +# We must use a lower case letter as parameter N, otherwise it is +# handled as constant Length is the length of the resulting +# array, not the index of the last element (length -1) like in +# the perl example. +def n2pat(n, length) + pat = [] + i = 1 + while i <= length + pat.push(n % i) + n /= i + i += 1 + end + pat +end + +# pat2perm(pat): turn pattern returned by n2pat() into +# permutation of integers. +def pat2perm(pat) + source = (0 .. pat.length - 1).to_a + perm = [] + perm.push(source.slice!(pat.pop)) while pat.length > 0 + perm +end + +def n2perm(n, len) + pat2perm(n2pat(n,len)) +end + +# In ruby the main program must be after all definitions +while gets + data = split + # the perl solution has used $#data, which is length-1 + num_permutations = Factorial.compute(data.length()) + 0.upto(num_permutations - 1) do |i| + # in ruby we can not use an array as selector for an array + # but by exchanging the two arrays, we can use the collect method + # which returns an array with the result of all block invocations + permutation = n2perm(i, data.length).collect { + |j| data[j] + } + puts permutation.join(" ") + end +end + + +# @@PLEAC@@_5.0 +age = { "Nat", 24, + "Jules", 25, + "Josh", 17 } + +age["Nat"] = 24 +age["Jules"] = 25 +age["Josh"] = 17 + +food_color = { + "Apple" => "red", + "Banana" => "yellow", + "Lemon" => "yellow", + "Carrot" => "orange" + } + +# In Ruby, you cannot avoid the double or simple quoting +# while manipulatin hashes + + +# @@PLEAC@@_5.1 +hash[key] = value + +food_color["Raspberry"] = "pink" +puts "Known foods:", food_color.keys + + +# @@PLEAC@@_5.2 +# does hash have a value for key ? +if (hash.has_key?(key)) + # it exists +else + # it doesn't +end + +[ "Banana", "Martini" ].each { |name| + print name, " is a ", food_color.has_key?(name) ? "food" : "drink", "\n" +} + +age = {} +age['Toddler'] = 3 +age['Unborn'] = 0 +age['Phantasm'] = nil + +for thing in ['Toddler', 'Unborn', 'Phantasm', 'Relic'] + print "#{thing}: " + print "Has-key " if age.has_key?(thing) + print "True " if age[thing] + print "Nonzero " if age[thing] && age[thing].nonzero? + print "\n" +end + +#=> +# Toddler: Has-key True Nonzero +# Unborn: Has-key True +# Phantasm: Has-key +# Relic: + +# You use Hash#has_key? when you use Perl's exists -> it checks +# for existence of a key in a hash. +# All Numeric are "True" in ruby, so the test doesn't have the +# same semantics as in Perl; you would use Numeric#nonzero? to +# achieve the same semantics (false if 0, true otherwise). + + +# @@PLEAC@@_5.3 +food_color.delete("Banana") + + +# @@PLEAC@@_5.4 +hash.each { |key, value| + # do something with key and value +} + +hash.each_key { |key| + # do something with key +} + +food_color.each { |food, color| + puts "#{food} is #{color}" +} + +food_color.each_key { |food| + puts "#{food} is #{food_color[food]}" +} + +# IMO this demonstrates that OO style is by far more readable +food_color.keys.sort.each { |food| + puts "#{food} is #{food_color[food]}." +} + +#----------------------------- +#!/usr/bin/ruby +# countfrom - count number of messages from each sender + +# Default value is 0 +from = Hash.new(0) +while gets + /^From: (.*)/ and from[$1] += 1 +end + +# More useful to sort by number of received mail by person +from.sort {|a,b| b[1]<=>a[1]}.each { |v| + puts "#{v[1]}: #{v[0]}" +} +#----------------------------- + + +# @@PLEAC@@_5.5 +# You may use the built-in 'inspect' method this way: +p hash + +# Or do it the Cookbook way: +hash.each { |k,v| puts "#{k} => #{v}" } + +# Sorted by keys +hash.sort.each { |e| puts "#{e[0]} => #{e[1]}" } +# Sorted by values +hash.sort{|a,b| a[1]<=>b[1]}.each { |e| puts "#{e[0]} => #{e[1]}" } + + +# @@PLEAC@@_5.7 +ttys = Hash.new +for i in `who` + user, tty = i.split + (ttys[user] ||= []) << tty # see problems_ruby for more infos +end +ttys.keys.sort.each { |k| + puts "#{k}: #{commify_series(ttys[k])}" # from 4.2 +} + + +# @@PLEAC@@_5.8 +surname = { "Mickey" => "Mantle", "Babe" => "Ruth" } +puts surname.index("Mantle") + +# If you really needed to 'invert' the whole hash, use Hash#invert + +#----------------------------- +#!/usr/bin/ruby -w +# foodfind - find match for food or color + +given = ARGV.shift or raise "usage: foodfind food_or_color" + +color = { + "Apple" => "red", + "Banana" => "yellow", + "Lemon" => "yellow", + "Carrot" => "orange", +} + +if (color.has_key?(given)) + puts "#{given} is a food with color #{color[given]}." +end +if (color.has_value?(given)) + puts "#{color.index(given)} is a food with color #{given}." +end +#----------------------------- + + +# @@PLEAC@@_5.9 +# Sorted by keys (Hash#sort gives an Array of pairs made of each key,value) +food_color.sort.each { |f| + puts "#{f[0]} is #{f[1]}." +} + +# Sorted by values +food_color.sort { |a,b| a[1] <=> b[1] }.each { |f| + puts "#{f[0]} is #{f[1]}." +} + +# Sorted by length of values +food_color.sort { |a,b| a[1].length <=> b[1].length }.each { |f| + puts "#{f[0]} is #{f[1]}." +} + + +# @@PLEAC@@_5.10 +merged = a.clone.update(b) # because Hash#update changes object in place + +drink_color = { "Galliano" => "yellow", "Mai Tai" => "blue" } +ingested_color = drink_color.clone.update(food_color) + +substance_color = {} +for i in [ food_color, drink_color ] + i.each_key { |k| + if substance_color.has_key?(k) + puts "Warning: #{k} seen twice. Using the first definition." + next + end + substance_color[k] = 1 + } +end + + +# @@PLEAC@@_5.11 +common = hash1.keys & hash2.keys + +this_not_that = hash1.keys - hash2.keys + + +# @@PLEAC@@_5.12 +# no problem here, Ruby handles any kind of object for key-ing +# (it takes Object#hash, which defaults to Object#id) + + +# @@PLEAC@@_5.13 +# AFAIK, not possible in Ruby + + +# @@PLEAC@@_5.14 +# Be careful, the following is possible only because Fixnum objects are +# special (documentation says: there is effectively only one Fixnum object +# instance for any given integer value). +count = Hash.new(0) +array.each { |e| + count[e] += 1 +} + + +# @@PLEAC@@_5.15 +father = { + "Cain" , "Adam", + "Abel" , "Adam", + "Seth" , "Adam", + "Enoch" , "Cain", + "Irad" , "Enoch", + "Mehujael" , "Irad", + "Methusael" , "Mehujael", + "Lamech" , "Methusael", + "Jabal" , "Lamech", + "Jubal" , "Lamech", + "Tubalcain" , "Lamech", + "Enos" , "Seth", +} + +while gets + chomp + begin + print $_, " " + end while $_ = father[$_] + puts +end + +children = {} +father.each { |k,v| + (children[v] ||= []) << k +} +while gets + chomp + puts "#{$_} begat #{(children[$_] || ['Nobody']).join(', ')}.\n" +end + +includes = {} +files.each { |f| + begin + for l in IO.readlines(f) + next unless l =~ /^\s*#\s*include\s*<([^>]+)>/ + (includes[$1] ||= []) << f + end + rescue SystemCallError + $stderr.puts "#$! (skipping)" + end +} + +include_free = includes.values.flatten.uniq - includes.keys + + +# @@PLEAC@@_5.16 +# dutree - print sorted intented rendition of du output +#% dutree +#% dutree /usr +#% dutree -a +#% dutree -a /bin + +# The DuNode class collects all information about a directory, +# and provides some convenience methods +class DuNode + + attr_reader :name + attr_accessor :size + attr_accessor :kids + + def initialize(name) + @name = name + @kids = [] + @size = 0 + end + + # support for sorting nodes with side + def size_compare(node2) + @size <=> node2.size + end + + def basename + @name.sub(/.*\//, "") + end + + #returns substring before last "/", nil if not there + def parent + p = @name.sub(/\/[^\/]+$/,"") + if p == @name + nil + else + p + end + end + +end + +# The DuTree does the acdtual work of +# getting the input, parsing it, builging up a tree +# and format it for output +class Dutree + + attr_reader :topdir + + def initialize + @nodes = Hash.new + @dirsizes = Hash.new(0) + @kids = Hash.new([]) + end + + # get a node by name, create it if it does not exist yet + def get_create_node(name) + if @nodes.has_key?(name) + @nodes[name] + else + node = DuNode.new(name) + @nodes[name] = node + node + end + end + + # run du, read in input, save sizes and kids + # stores last directory read in instance variable topdir + def input(arguments) + name = "" + cmd = "du " + arguments.join(" ") + IO.popen(cmd) { |pipe| + pipe.each { |line| + size, name = line.chomp.split(/\s+/, 2) + node = get_create_node(name) + node.size = size.to_i + @nodes[name] = node + parent = node.parent + if parent + get_create_node(parent).kids.push(node) + end + } + } + @topdir = @nodes[name] + end + + # figure out how much is taken in each directory + # that isn't stored in the subdirectories. Add a new + # fake kid called "." containing that much. + def get_dots(node) + cursize = node.size + for kid in node.kids + cursize -= kid.size + get_dots(kid) + end + if node.size != cursize + newnode = get_create_node(node.name + "/.") + newnode.size = cursize + node.kids.push(newnode) + end + end + + # recursively output everything + # passing padding and number width as well + # on recursive calls + def output(node, prefix="", width=0) + line = sprintf("%#{width}d %s", node.size, node.basename) + puts(prefix + line) + prefix += line.sub(/\d /, "| ") + prefix.gsub!(/[^|]/, " ") + if node.kids.length > 0 # not a bachelor node + kids = node.kids + kids.sort! { |a,b| + b.size_compare(a) + } + width = kids[0].size.to_s.length + for kid in kids + output(kid, prefix, width) + end + end + end + +end + +tree = Dutree.new +tree.input(ARGV) +tree.get_dots(tree.topdir) +tree.output(tree.topdir) + + +# @@PLEAC@@_6.0 +# The verbose version are match, sub, gsub, sub! and gsub!; +# pattern needs to be a Regexp object; it yields a MatchData +# object. +pattern.match(string) +string.sub(pattern, replacement) +string.gsub(pattern, replacement) +# As usual in Ruby, sub! does the same as sub but also modifies +# the object, the same for gsub!/gsub. + +# Sugared syntax yields the position of the match (or nil if no +# match). Note that the object at the right of the operator needs +# not to be a Regexp object (it can be a String). The "dont +# match" operator yields true or false. +meadow =~ /sheep/ # position of the match, nil if no match +meadow !~ /sheep/ # true if doesn't match, false if it does +# There is no sugared version for the substitution + +meadow =~ /\bovines?\b/i and print "Here be sheep!" + +string = "good food" +string.sub!(/o*/, 'e') + +# % echo ababacaca | ruby -ne 'puts $& if /(a|ba|b)+(a|ac)+/' +# ababa + +# The "global" (or "multiple") match is handled by String#scan +scan (/(\d+)/) { + puts "Found number #{$1}" +} + +# String#scan yields an Array if not used with a block +numbers = scan(/\d+/) + +digits = "123456789" +nonlap = digits.scan(/(\d\d\d)/) +yeslap = digits.scan(/(?=(\d\d\d))/) +puts "Non-overlapping: #{nonlap.join(' ')}" +puts "Overlapping: #{yeslap.join(' ')}"; +# Non-overlapping: 123 456 789 +# Overlapping: 123 234 345 456 567 678 789 + +string = "And little lambs eat ivy" +string =~ /l[^s]*s/ +puts "(#$`) (#$&) (#$')" +# (And ) (little lambs) ( eat ivy) + + +# @@PLEAC@@_6.1 +# Ruby doesn't have the same problem: +dst = src.sub('this', 'that') + +progname = $0.sub('^.*/', '') + +bindirs = %w(/usr/bin /bin /usr/local/bin) +libdirs = bindirs.map { |l| l.sub('bin', 'lib') } + + +# @@PLEAC@@_6.3 +/\S+/ # as many non-whitespace bytes as possible +/[A-Za-z'-]+/ # as many letters, apostrophes, and hyphens + +/\b([A-Za-z]+)\b/ # usually best +/\s([A-Za-z]+)\s/ # fails at ends or w/ punctuation + + +# @@PLEAC@@_6.4 +require 'socket' +str = 'www.ruby-lang.org and www.rubygarden.org' +re = / + ( # capture the hostname in $1 + (?: # these parens for grouping only + (?! [-_] ) # lookahead for neither underscore nor dash + [\w-] + # hostname component + \. # and the domain dot + ) + # now repeat that whole thing a bunch of times + [A-Za-z] # next must be a letter + [\w-] + # now trailing domain part + ) # end of $1 capture + /x # /x for nice formatting + +str.gsub! re do # pass a block to execute replacement + host = TCPsocket.gethostbyname($1) + "#{$1} [#{host[3]}]" +end + +puts str +#----------------------------- +# to match whitespace or #-characters in an extended re you need to escape +# them. + +foo = 42 +str = 'blah #foo# blah' +str.gsub! %r/ # replace + \# # a pound sign + (\w+) # the variable name + \# # another pound sign + /x do + eval $1 # with the value of a local variable + end +puts str # => blah 42 blah + + +# @@PLEAC@@_6.5 +# The 'g' modifier doesn't exist in Ruby, a regexp can't be used +# directly in a while loop; instead, use String#scan { |match| .. } +fish = 'One fish two fish red fish blue fish' +WANT = 3 +count = 0 +fish.scan(/(\w+)\s+fish\b/i) { + if (count += 1) == WANT + puts "The third fish is a #{$1} one." + end +} + +if fish =~ /(?:\w+\s+fish\s+){2}(\w+)\s+fish/i + puts "The third fish is a #{$1} one." +end + +pond = 'One fish two fish red fish blue fish' +# String#scan without a block gives an array of matches, each match +# being an array of all the specified groups +colors = pond.scan(/(\w+)\s+fish\b/i).flatten # get all matches +color = colors[2] # then the one we want +# or without a temporary array +color = pond.scan(/(\w+)\s+fish\b/i).flatten[2] # just grab element 3 +puts "The third fish in the pond is #{color}." + +count = 0 +fishes = 'One fish two fish red fish blue fish' +evens = fishes.scan(/(\w+)\s+fish\b/i).select { (count+=1) % 2 == 0 } +print "Even numbered fish are #{evens.join(' ')}." + +count = 0 +fishes.gsub(/ + \b # makes next \w more efficient + ( \w+ ) # this is what we\'ll be changing + ( + \s+ fish \b + ) + /x) { + if (count += 1) == 4 + 'sushi' + $2 + else + $1 + $2 + end +} + +pond = 'One fish two fish red fish blue fish swim here.' +puts "Last fish is #{pond.scan(/\b(\w+)\s+fish\b/i).flatten[-1]}" + +/ + A # find some pattern A + (?! # mustn\'t be able to find + .* # something + A # and A + ) + $ # through the end of the string +/x + +# The "s" perl modifier is "m" in Ruby (not very nice since there is +# also an "m" in perl..) +pond = "One fish two fish red fish blue fish swim here." +if (pond =~ / + \b ( \w+) \s+ fish \b + (?! .* \b fish \b ) + /mix) + puts "Last fish is #{$1}." +else + puts "Failed!" +end + + +# @@PLEAC@@_6.6 +#----------------------------- +#!/usr/bin/ruby -w +# killtags - very bad html killer +$/ = nil; # each read is whole file +while file = gets() do + file.gsub!(/<.*?>/m,''); # strip tags (terribly) + puts file # print file to STDOUT +end +#----------------------------- +#!/usr/bin/ruby -w +#headerfy - change certain chapter headers to html +$/ = '' +while file = gets() do + pattern = / + \A # start of record + ( # capture in $1 + Chapter # text string + \s+ # mandatory whitespace + \d+ # decimal number + \s* # optional whitespace + : # a real colon + . * # anything not a newline till end of line + ) + /x + puts file.gsub(pattern,'<H1>\1</H1>') +end +#----------------------------- +#% ruby -00pe "gsub!(/\A(Chapter\s+\d+\s*:.*)/,'<H1>\1</H1>')" datafile + +#!/usr/bin/ruby -w +#----------------------------- +for file in ARGV + file = File.open(ARGV.shift) + while file.gets('') do # each read is a paragraph + print "chunk #{$.} in $ARGV has <<#{$1}>>\n" while /^START(.*?)^END/m + end # /m activates the multiline mode +end +#----------------------------- + +# @@PLEAC@@_6.7 +#----------------------------- +$/ = nil; +file = File.open("datafile") +chunks = file.gets.split(/pattern/) +#----------------------------- +# .Ch, .Se and .Ss divide chunks of STDIN +chunks = gets(nil).split(/^\.(Ch|Se|Ss)$/) +print "I read #{chunks.size} chunks.\n" +#----------------------------- + + +# @@PLEAC@@_6.8 +while gets + if ~/BEGIN/ .. ~/END/ + # line falls between BEGIN and END inclusive + end +end + +while gets + if ($. == firstnum) .. ($. == lastnum) + # operate between firstnum and lastnum line number + end +end + +# in ruby versions prior to 1.8, the above two conditional +# expressions could be shortened to: +# if /BEGIN/ .. /END/ +# and +# if firstnum .. lastnum +# but these now only work this way from the command line + +#----------------------------- + +while gets + if ~/BEGIN/ ... ~/END/ + # line falls between BEGIN and END on different lines + end +end + +while gets + if ($. == first) ... ($. == last) + # operate between first and last line number on different lines + end +end + +#----------------------------- +# command-line to print lines 15 through 17 inclusive (see below) +ruby -ne 'print if 15 .. 17' datafile + +# print out all <XMP> .. </XMP> displays from HTML doc +while gets + print if ~%r#<XMP>#i .. ~%r#</XMP>#i; +end + +# same, but as shell command +# ruby -ne 'print if %r#<XMP>#i .. %r#</XMP>#i' document.html +#----------------------------- +# ruby -ne 'BEGIN { $top=3; $bottom=5 }; \ +# print if $top .. $bottom' /etc/passwd # FAILS +# ruby -ne 'BEGIN { $top=3; $bottom=5 }; \ +# print if $. == $top .. $. == $bottom' /etc/passwd # works +# ruby -ne 'print if 3 .. 5' /etc/passwd # also works +#----------------------------- +print if ~/begin/ .. ~/end/; +print if ~/begin/ ... ~/end/; +#----------------------------- +while gets + $in_header = $. == 1 .. ~/^$/ ? true : false + $in_body = ~/^$/ .. ARGF.eof ? true : false +end +#----------------------------- +seen = {} +ARGF.each do |line| + next unless line =~ /^From:?\s/i .. line =~ /^$/; + line.scan(%r/([^<>(),;\s]+\@[^<>(),;\s]+)/).each do |addr| + puts addr unless seen[addr] + seen[addr] ||= 1 + end +end + + +# @@PLEAC@@_6.9 +def glob2pat(globstr) + patmap = { + '*' => '.*', + '?' => '.', + '[' => '[', + ']' => ']', + } + globstr.gsub!(/(.)/) { |c| patmap[c] || Regexp::escape(c) } + '^' + globstr + '$' +end + + +# @@PLEAC@@_6.10 +# avoid interpolating patterns like this if the pattern +# isn't going to change: +pattern = ARGV.shift +ARGF.each do |line| + print line if line =~ /#{pattern}/ +end + +# the above creates a new regex each iteration. Instead, +# use the /o modifier so the regex is compiled only once + +pattern = ARGV.shift +ARGF.each do |line| + print line if line =~ /#{pattern}/o +end + +#----------------------------- + +#!/usr/bin/ruby +# popgrep1 - grep for abbreviations of places that say "pop" +# version 1: slow but obvious way +popstates = %w(CO ON MI WI MN) +ARGF.each do |line| + popstates.each do |state| + if line =~ /\b#{state}\b/ + print line + last + end + end +end + +#----------------------------- +#!/usr/bin/ruby +# popgrep2 - grep for abbreviations of places that say "pop" +# version 2: eval strings; fast but hard to quote +popstates = %w(CO ON MI WI MN) +code = "ARGF.each do |line|\n" +popstates.each do |state| + code += "\tif line =~ /\\b#{state}\\b/; print(line); next; end\n" +end +code += "end\n" +print "CODE IS\n---\n#{code}\n---\n" if false # turn on for debugging +eval code + +# CODE IS +# --- +# ARGF.each do |line| +# if line =~ /\bCO\b/; print(line); next; end +# if line =~ /\bON\b/; print(line); next; end +# if line =~ /\bMI\b/; print(line); next; end +# if line =~ /\bWI\b/; print(line); next; end +# if line =~ /\bMN\b/; print(line); next; end +# end +# +# --- + +## alternatively, the same idea as above but compiling +## to a case statement: (not in perlcookbook) +#!/usr/bin/ruby -w +# popgrep2.5 - grep for abbreviations of places that say "pop" +# version 2.5: eval strings; fast but hard to quote +popstates = %w(CO ON MI WI MN) +code = "ARGF.each do |line|\n case line\n" +popstates.each do |state| + code += " when /\\b#{state}\\b/ : print line\n" +end +code += " end\nend\n" +print "CODE IS\n---\n#{code}\n---\n" if false # turn on for debugging +eval code + +# CODE IS +# --- +# ARGF.each do |line| +# case line +# when /\bCO\b/ : print line +# when /\bON\b/ : print line +# when /\bMI\b/ : print line +# when /\bWI\b/ : print line +# when /\bMN\b/ : print line +# end +# end +# +# --- + +# Note: (above) Ruby 1.8+ allows the 'when EXP : EXPR' on one line +# with the colon separator. + +#----------------------------- +#!/usr/bin/ruby +# popgrep3 - grep for abbreviations of places that say "pop" +# version3: build a match_any function +popstates = %w(CO ON MI WI MN) +expr = popstates.map{|e|"line =~ /\\b#{e}\\b/"}.join('||') +eval "def match_any(line); #{expr};end" +ARGF.each do |line| + print line if match_any(line) +end +#----------------------------- + +## building a match_all function is a trivial +## substitution of && for || +## here is a generalized example: +#!/usr/bin/ruby -w +## grepauth - print lines that mention both foo and bar +class MultiMatch + def initialize(*patterns) + _any = build_match('||',patterns) + _all = build_match('&&',patterns) + eval "def match_any(line);#{_any};end\n" + eval "def match_all(line);#{_all};end\n" + end + def build_match(sym,args) + args.map{|e|"line =~ /#{e}/"}.join(sym) + end +end + +mm = MultiMatch.new('foo','bar') +ARGF.each do |line| + print line if mm.match_all(line) +end +#----------------------------- + +#!/usr/bin/ruby +# popgrep4 - grep for abbreviations of places that say "pop" +# version4: pretty fast, but simple: compile all re's first: +popstates = %w(CO ON MI WI MN) +popstates = popstates.map{|re| %r/\b#{re}\b/} +ARGF.each do |line| + popstates.each do |state_re| + if line =~ state_re + print line + break + end + end +end + +## speeds trials on the jargon file(412): 26006 lines, 1.3MB +## popgrep1 => 7.040s +## popgrep2 => 0.656s +## popgrep2.5 => 0.633s +## popgrep3 => 0.675s +## popgrep4 => 1.027s + +# unless speed is criticial, the technique in popgrep4 is a +# reasonable balance between speed and logical simplicity. + + +# @@PLEAC@@_6.11 +begin + print "Pattern? " + pat = $stdin.gets.chomp + Regexp.new(pat) +rescue + warn "Invalid Pattern" + retry +end + + +# @@PLEAC@@_6.13 +# uses the 'amatch' extension found on: +# http://raa.ruby-lang.org/project/amatch/ +require 'amatch' +matcher = Amatch.new('balast') +#$relative, $distance = 0, 1 +File.open('/usr/share/dict/words').each_line do |line| + print line if matcher.search(line) <= 1 +end +__END__ +#CODE +ballast +ballasts +balustrade +balustrades +blast +blasted +blaster +blasters +blasting +blasts + + +# @@PLEAC@@_6.14 +str.scan(/\G(\d)/).each do |token| + puts "found #{token}" +end +#----------------------------- +n = " 49 here" +n.gsub!(/\G /,'0') +puts n +#----------------------------- +str = "3,4,5,9,120" +str.scan(/\G,?(\d+)/).each do |num| + puts "Found number: #{num}" +end +#----------------------------- +# Ruby doesn't have the String.pos or a /c re modifier like Perl +# But it does have StringScanner in the standard library (strscn) +# which allows similar functionality: + +require 'strscan' +text = 'the year 1752 lost 10 days on the 3rd of September' +sc = StringScanner.new(text) +while sc.scan(/.*?(\d+)/) + print "found: #{sc[1]}\n" +end +if sc.scan(/\S+/) + puts "Found #{sc[0]} after last number" +end +#----------------------------- +# assuming continuing from above: +puts "The position in 'text' is: #{sc.pos}" +sc.pos = 30 +puts "The position in 'text' is: #{sc.pos}" + + +# @@PLEAC@@_6.15 +#----------------------------- +# greedy pattern +str.gsub!(/<.*>/m,'') # not good + +# non-greedy (minimal) pattern +str.gsub!(/<.*?>/m,'') # not great + + +#----------------------------- +#<b><i>this</i> and <i>that</i> are important</b> Oh, <b><i>me too!</i></b> +#----------------------------- +%r{ <b><i>(.*?)</i></b> }mx +#----------------------------- +%r/BEGIN((?:(?!BEGIN).)*)END/ +#----------------------------- +%r{ <b><i>( (?: (?!</b>|</i>). )* ) </i></b> }mx +#----------------------------- +%r{ <b><i>( (?: (?!</[ib]>). )* ) </i></b> }mx +#----------------------------- +%r{ + <b><i> + [^<]* # stuff not possibly bad, and not possibly the end. + (?: + # at this point, we can have '<' if not part of something bad + (?! </?[ib]> ) # what we can't have + < # okay, so match the '<' + [^<]* # and continue with more safe stuff + ) * + </i></b> + }mx + + +# @@PLEAC@@_6.16 +#----------------------------- +$/ = "" +ARGF.each do |para| + para.scan %r/ + \b # start at word boundary + (\S+) # find chunk of non-whitespace + \b # until a word boundary + ( + \s+ # followed by whitespace + \1 # and that same chunk again + \b # and a word boundary + ) + # one or more times + /xi do + puts "dup word '#{$1}' at paragraph #{$.}" + end +end +#----------------------------- +astr = 'nobody' +bstr = 'bodysnatcher' +if "#{astr} #{bstr}" =~ /^(\w+)(\w+) \2(\w+)$/ + print "#{$2} overlaps in #{$1}-#{$2}-#{$3}" +end +#----------------------------- +#!/usr/bin/ruby -w +# prime_pattern -- find prime factors of argument using patterns +ARGV << 180 +cap = 'o' * ARGV.shift +while cap =~ /^(oo+?)\1+$/ + print $1.size, " " + cap.gsub!(/#{$1}/,'o') +end +puts cap.size +#----------------------------- +#diophantine +# solve for 12x + 15y + 16z = 281, maximizing x +if ('o' * 281).match(/^(o*)\1{11}(o*)\2{14}(o*)\3{15}$/) + x, y, z = $1.size, $2.size, $3.size + puts "One solution is: x=#{x}; y=#{y}; z=#{z}" +else + puts "No solution." +end +# => One solution is: x=17; y=3; z=2 + +#----------------------------- +# using different quantifiers: +('o' * 281).match(/^(o+)\1{11}(o+)\2{14}(o+)\3{15}$/) +# => One solution is: x=17; y=3; z=2 + +('o' * 281).match(/^(o*?)\1{11}(o*)\2{14}(o*)\3{15}$/) +# => One solution is: x=0; y=7; z=11 + +('o' * 281).match(/^(o+?)\1{11}(o*)\2{14}(o*)\3{15}$/) +# => One solution is: x=1; y=3; z=14 + + +# @@PLEAC@@_6.17 +# alpha OR beta +%r/alpha|beta/ + +# alpha AND beta +%r/(?=.*alpha)(?=.*beta)/m + +# alpha AND beta, no overlap +%r/alpha.*beta|beta.*alpha/m + +# NOT beta +%r/^(?:(?!beta).)*$/m + +# NOT bad BUT good +%r/(?=(?:(?!BAD).)*$)GOOD/m +#----------------------------- + +if !(string =~ /pattern/) # ugly + something() +end + +if string !~ /pattern/ # preferred + something() +end + + +#----------------------------- +if string =~ /pat1/ && string =~ /pat2/ + something() +end +#----------------------------- +if string =~ /pat1/ || string =~ /pat2/ + something() +end +#----------------------------- +#!/usr/bin/ruby -w +# minigrep - trivial grep +pat = ARGV.shift +ARGF.each do |line| + print line if line =~ /#{pat}/o +end +#----------------------------- + "labelled" =~ /^(?=.*bell)(?=.*lab)/m +#----------------------------- +$string =~ /bell/ && $string =~ /lab/ +#----------------------------- +$murray_hill = "blah bell blah " +if $murray_hill =~ %r{ + ^ # start of string + (?= # zero-width lookahead + .* # any amount of intervening stuff + bell # the desired bell string + ) # rewind, since we were only looking + (?= # and do the same thing + .* # any amount of intervening stuff + lab # and the lab part + ) + }mx # /m means . can match newline + + print "Looks like Bell Labs might be in Murray Hill!\n"; +end +#----------------------------- +"labelled" =~ /(?:^.*bell.*lab)|(?:^.*lab.*bell)/ +#----------------------------- +$brand = "labelled"; +if $brand =~ %r{ + (?: # non-capturing grouper + ^ .*? # any amount of stuff at the front + bell # look for a bell + .*? # followed by any amount of anything + lab # look for a lab + ) # end grouper + | # otherwise, try the other direction + (?: # non-capturing grouper + ^ .*? # any amount of stuff at the front + lab # look for a lab + .*? # followed by any amount of anything + bell # followed by a bell + ) # end grouper + }mx # /m means . can match newline + print "Our brand has bell and lab separate.\n"; +end +#----------------------------- +$map =~ /^(?:(?!waldo).)*$/s +#----------------------------- +$map = "the great baldo" +if $map =~ %r{ + ^ # start of string + (?: # non-capturing grouper + (?! # look ahead negation + waldo # is he ahead of us now? + ) # is so, the negation failed + . # any character (cuzza /s) + ) * # repeat that grouping 0 or more + $ # through the end of the string + }mx # /m means . can match newline + print "There's no waldo here!\n"; +end +=begin + 7:15am up 206 days, 13:30, 4 users, load average: 1.04, 1.07, 1.04 + +USER TTY FROM LOGIN@ IDLE JCPU PCPU WHAT + +tchrist tty1 5:16pm 36days 24:43 0.03s xinit + +tchrist tty2 5:19pm 6days 0.43s 0.43s -tcsh + +tchrist ttyp0 chthon 7:58am 3days 23.44s 0.44s -tcsh + +gnat ttyS4 coprolith 2:01pm 13:36m 0.30s 0.30s -tcsh +=end +#% w | minigrep '^(?!.*ttyp).*tchrist' +#----------------------------- +%r{ + ^ # anchored to the start + (?! # zero-width look-ahead assertion + .* # any amount of anything (faster than .*?) + ttyp # the string you don't want to find + ) # end look-ahead negation; rewind to start + .* # any amount of anything (faster than .*?) + tchrist # now try to find Tom +}x +#----------------------------- +#% w | grep tchrist | grep -v ttyp +#----------------------------- +#% grep -i 'pattern' files +#% minigrep '(?i)pattern' files +#----------------------------- + + +# @@PLEAC@@_6.20 +ans = $stdin.gets.chomp +re = %r/^#{Regexp.quote(ans)}/ +case + when "SEND" =~ re : puts "Action is send" + when "STOP" =~ re : puts "Action is stop" + when "ABORT" =~ re : puts "Action is abort" + when "EDIT" =~ re : puts "Action is edit" +end +#----------------------------- +require 'abbrev' +table = Abbrev.abbrev %w-send stop abort edit- +loop do + print "Action: " + ans = $stdin.gets.chomp + puts "Action for #{ans} is #{table[ans.downcase]}" +end + + +#----------------------------- +# dummy values are defined for 'file', 'PAGER', and +# the 'invoke_editor' and 'deliver_message' methods +# do not do anything interesting in this example. +#!/usr/bin/ruby -w +require 'abbrev' + +file = 'pleac_ruby.data' +PAGER = 'less' + +def invoke_editor + puts "invoking editor" +end + +def deliver_message + puts "delivering message" +end + +actions = { + 'edit' => self.method(:invoke_editor), + 'send' => self.method(:deliver_message), + 'list' => proc {system(PAGER, file)}, + 'abort' => proc {puts "See ya!"; exit}, + "" => proc {puts "Unknown Command"} +} + +dtable = Abbrev.abbrev(actions.keys) +loop do + print "Action: " + ans = $stdin.gets.chomp.delete(" \t") + actions[ dtable[ans.downcase] || "" ].call +end + + +# @@PLEAC@@_6.19 +#----------------------------- +# basically, the Perl Cookbook categorizes this as an +# unsolvable problem ... +#----------------------------- +1 while addr.gsub!(/\([^()]*\)/,'') +#----------------------------- +Dear someuser@host.com, + +Please confirm the mail address you gave us Wed May 6 09:38:41 +MDT 1998 by replying to this message. Include the string +"Rumpelstiltskin" in that reply, but spelled in reverse; that is, +start with "Nik...". Once this is done, your confirmed address will +be entered into our records. + + +# @@PLEAC@@_6.21 +#----------------------------- +#% gunzip -c ~/mail/archive.gz | urlify > archive.urlified +#----------------------------- +#% urlify ~/mail/*.inbox > ~/allmail.urlified +#----------------------------- +#!/usr/bin/ruby -w +# urlify - wrap HTML links around URL-like constructs + +urls = '(https?|telnet|gopher|file|wais|ftp)'; +ltrs = '\w'; +gunk = '/#~:.?+=&%@!\-'; +punc = '.:?\-'; +any = "#{ltrs}#{gunk}#{punc}"; + +ARGF.each do |line| + line.gsub! %r/ + \b # start at word boundary + ( # begin $1 { + #{urls} : # need resource and a colon + [#{any}] +? # followed by on or more + # of any valid character, but + # be conservative and take only + # what you need to.... + ) # end $1 } + (?= # look-ahead non-consumptive assertion + [#{punc}]* # either 0 or more punctuation + [^#{any}] # followed by a non-url char + | # or else + $ # then end of the string + ) + /iox do + %Q|<A HREF="#{$1}">#{$1}</A>| + end + print line +end + + +# @@PLEAC@@_6.23 +%r/^m*(d?c{0,3}|c[dm])(l?x{0,3}|x[lc])(v?i{0,3}|i[vx])$/i +#----------------------------- +str.sub!(/(\S+)(\s+)(\S+)/, '\3\2\1') +#----------------------------- +%r/(\w+)\s*=\s*(.*)\s*$/ # keyword is $1, value is $2 +#----------------------------- +%r/.{80,}/ +#----------------------------- +%r|(\d+)/(\d+)/(\d+) (\d+):(\d+):(\d+)| +#----------------------------- +str.gsub!(%r|/usr/bin|,'/usr/local/bin') +#----------------------------- +str.gsub!(/%([0-9A-Fa-f][0-9A-Fa-f])/){ $1.hex.chr } +#----------------------------- +str.gsub!(%r{ + /\* # Match the opening delimiter + .*? # Match a minimal number of characters + \*/ # Match the closing delimiter +}xm,'') +#----------------------------- +str.sub!(/^\s+/, '') +str.sub!(/\s+$/, '') + +# but really, in Ruby we'd just do: +str.strip! +#----------------------------- +str.gsub!(/\\n/,"\n") +#----------------------------- +str.sub!(/^.*::/, '') +#----------------------------- +%r/^([01]?\d\d|2[0-4]\d|25[0-5])\.([01]?\d\d|2[0-4]\d|25[0-5])\. + ([01]?\d\d|2[0-4]\d|25[0-5])\.([01]?\d\d|2[0-4]\d|25[0-5])$/x +#----------------------------- +str.sub!(%r|^.*/|, '') +#----------------------------- +cols = ( (ENV['TERMCAP'] || " ") =~ /:co#(\d+):/ ) ? $1 : 80; +#----------------------------- +name = " #{$0} #{ARGV}".gsub(%r| /\S+/|, ' ') +#----------------------------- +require 'rbconfig' +include Config +raise "This isn't Linux" unless CONFIG['target_os'] =~ /linux/i; +#----------------------------- +str.gsub!(%r/\n\s+/, ' ') +#----------------------------- +nums = str.scan(/(\d+\.?\d*|\.\d+)/) +#----------------------------- +capwords = str.scan(%r/(\b[^\Wa-z0-9_]+\b)/) +#----------------------------- +lowords = str.scan(%r/(\b[^\WA-Z0-9_]+\b)/) +#----------------------------- +icwords = str.scan(%r/(\b[^\Wa-z0-9_][^\WA-Z0-9_]*\b)/) +#----------------------------- +links = str.scan(%r/<A[^>]+?HREF\s*=\s*["']?([^'" >]+?)[ '"]?>/mi) +#----------------------------- +initial = str =~ /^\S+\s+(\S)\S*\s+\S/ ? $1 : "" +#----------------------------- +str.gsub!(%r/"([^"]*)"/, %q-``\1''-) +#----------------------------- + +$/ = "" +sentences = [] +ARGF.each do |para| + para.gsub!(/\n/, ' ') + para.gsub!(/ {3,}/,' ') + sentences << para.scan(/(\S.*?[!?.])(?= |\Z)/) +end + +#----------------------------- +%r/(\d{4})-(\d\d)-(\d\d)/ # YYYY in $1, MM in $2, DD in $3 +#----------------------------- +%r/ ^ + (?: + 1 \s (?: \d\d\d \s)? # 1, or 1 and area code + | # ... or ... + \(\d\d\d\) \s # area code with parens + | # ... or ... + (?: \+\d\d?\d? \s)? # optional +country code + \d\d\d ([\s\-]) # and area code + ) + \d\d\d (\s|\1) # prefix (and area code separator) + \d\d\d\d # exchange + $ + /x +#----------------------------- +%r/\boh\s+my\s+gh?o(d(dess(es)?|s?)|odness|sh)\b/i +#----------------------------- +lines = [] +lines << $1 while input.sub!(/^([^\012\015]*)(\012\015?|\015\012?)/,'') + + +# @@PLEAC@@_7.0 +# An IO object being Enumerable, we can use 'each' directly on it +File.open("/usr/local/widgets/data").each { |line| + puts line if line =~ /blue/ +} + +logfile = File.new("/var/log/rubylog.txt", "w") +mysub($stdin, logfile) + +# The method IO#readline is similar to IO#gets +# but throws an exception when it reaches EOF +f = File.new("bla.txt") +begin + while (line = f.readline) + line.chomp + $stdout.print line if line =~ /blue/ + end +rescue EOFError + f.close +end + +while $stdin.gets # reads from STDIN + unless (/\d/) + $stderr.puts "No digit found." # writes to STDERR + end + puts "Read: #{$_}" # writes to STDOUT +end + +logfile = File.new("/tmp/log", "w") + +logfile.close + +# $defout (or its synonym '$>') is the destination of output +# for Kernel#print, Kernel#puts, and family functions +logfile = File.new("log.txt", "w") +old = $defout +$defout = logfile # switch to logfile for output +puts "Countdown initiated ..." +$defout = old # return to original output +puts "You have 30 seconds to reach minimum safety distance." + + +# @@PLEAC@@_7.1 +source = File.new(path, "r") # open file "path" for reading only +sink = File.new(path, "w") # open file "path" for writing only + +source = File.open(path, File::RDONLY) # open file "path" for reading only +sink = File.open(path, File::WRONLY) # open file "path" for writing only + +file = File.open(path, "r+") # open "path" for reading and writing +file = File.open(path, flags) # open "path" with the flags "flags" (see examples below for flags) + +# open file "path" read only +file = File.open(path, "r") +file = File.open(path, File::RDONLY) + +# open file "path" write only, create it if it does not exist +# truncate it to zero length if it exists +file = File.open(path, "w") +file = File.open(path, File::WRONLY|File::TRUNC|File::CREAT) +file = File.open(path, File::WRONLY|File::TRUNC|File::CREAT, 0666) # with permission 0666 + +# open file "path" write only, fails if file exists +file = File.open(path, File::WRONLY|File::EXCL|File::CREAT) +file = File.open(path, File::WRONLY|File::EXCL|File::CREAT, 0666) + +# open file "path" for appending +file = File.open(path, "a") +file = File.open(path, File::WRONLY|File::APPEND|File::CREAT) +file = File.open(path, File::WRONLY|File::APPEND|File::CREAT, 0666) + +# open file "path" for appending only when file exists +file = File.open(path, File::WRONLY|File::APPEND) + +# open file "path" for reading and writing +file = File.open(path, "r+") +file = File.open(path, File::RDWR) + +# open file for reading and writing, create a new file if it does not exist +file = File.open(path, File::RDWR|File::CREAT) +file = File.open(path, File::RDWR|File::CREAT, 0600) + +# open file "path" reading and writing, fails if file exists +file = File.open(path, File::RDWR|File::EXCL|File::CREAT) +file = File.open(path, File::RDWR|File::EXCL|File::CREAT, 0600) + + +# @@PLEAC@@_7.2 +# No problem with Ruby since the filename doesn't contain characters with +# special meaning; like Perl's sysopen +File.open(filename, 'r') + + +# @@PLEAC@@_7.3 +File.expand_path('~root/tmp') +#=> "/root/tmp" +File.expand_path('~rpcuser') +#=> "/var/lib/nfs" + +# To expand ~/.. it explicitely needs the environment variable HOME +File.expand_path('~/tmp') +#=> "/home/gc/tmp" + + +# @@PLEAC@@_7.4 +# The exception raised in Ruby reports the filename +File.open('afile') + + +# @@PLEAC@@_7.5 +# Standard Ruby distribution provides the following useful extension +require 'tempfile' +# With the Tempfile class, the file is automatically deleted on garbage +# collection, so you won't need to remove it, later on. +tf = Tempfile.new('tmp') # a name is required to create the filename + +# If you need to pass the filename to an external program you can use +# File#path, but don't forget to File#flush in order to flush anything +# living in some buffer somewhere. +tf.flush +system("/usr/bin/dowhatever #{tf.path}") + +fh = Tempfile.new('tmp') +fh.sync = true # autoflushes +10.times { |i| fh.puts i } +fh.rewind +puts 'Tmp file has: ', fh.readlines + + +# @@PLEAC@@_7.6 +while (DATA.gets) do + # process the line +end +__END__ +# your data goes here +# __DATA__ doesn't exist in Ruby + +#CODE +# get info about the script (size, date of last modification) +kilosize = DATA.stat.size / 1024 +last_modif = DATA.stat.mtime +puts "<P>Script size is #{kilosize}" +puts "<P>Last script update: #{last_modif}" +__END__ +# DO NOT REMOVE THE PRECEEDING LINE. +# Everything else in this file will be ignored. +#CODE + + +# @@PLEAC@@_7.7 +while line = gets do + # do something with line. +end + +# or +while gets do + # do something with $_ +end + +# or more rubyish +$stdun.each do |line| + # do stuff with line +end + + +# ARGF may makes this more easy +# this is skipped if ARGV.size==0 +ARGV.each do |filename| + # closing and exception handling are done by the block + open(filename) do |fd| + fd.each do |line| + # do stuff with line + end + end rescue abort("can't open %s" % filename) +end + +# globbing is done in the Dir module +ARGV = Dir["*.[Cch]"] if ARGV.empty? + +# note: optparse is the preferred way to handle this +if (ARGV[0] == '-c') + chop_first += 1 + ARGV.shift +end + + +# processing numerical options +if ARGV[0] =~ /^-(\d+)$/ + columns = $1 + ARGV.shift +end + +# again, better to use optparse: +require 'optparse' +nostdout = 0 +append = 0 +unbuffer = 0 +ignore_ints = 0 +ARGV.options do |opt| + opt.on('-n') { nostdout +=1 } + opt.on('-a') { append +=1 } + opt.on('-u') { unbuffer +=1 } + opt.on('-i') { ignore_ints +=1 } + opt.parse! +end or abort("usage: " + __FILE__ + " [-ainu] [filenames]") + +# no need to do undef $/, we have File.read +str = File.read(ARGV[0]) + +# again we have File.read +str = File.read(ARGV[0]) + +# not sure what this should do: +# I believe open the file, print filename, lineno and line: +ARGF.each_with_index do |line, idx| + print ARGF.filename, ":", idx, ";", line +end + +# print all the lines in every file passed via command line that contains login +ARGF.each do |line| + puts line if line =~ /login/ +end +# +# even this would fit +#%ruby -ne "print if /f/" 2.log +# + +ARGF.each { |l| puts l.downcase! } + +#------------------ +#!/usr/bin/ruby -p +# just like perl's -p +$_.downcase! +# + +# I don't know who should I trust. +# perl's version splits on \w+ while python's on \w. + +chunks = 0 + +File.read(ARGV[0]).split.each do |word| + next if word =~ /^#/ + break if ["__DATA__", "__END__"].member? word + chunks += 1 +end + +print "Found ", chunks, " chunks\n" + + +# @@PLEAC@@_7.8 +old = File.open(old_file) +new = File.open(new_file, "w") +while old.gets do + # change $_, then... + new.print $_ +end +old.close +new.close +File.rename(old_file, "old.orig") +File.rename(new_file, old_file) + +while old.gets do + if $. == 20 then # we are at the 20th line + new.puts "Extra line 1" + new.puts "Extra line 2" + end + new.print $_ +end + +while old.gets do + next if 20..30 # skip the 20th line to the 30th + # Ruby (and Perl) permit to write if 20..30 + # instead of if (20 <= $.) and ($. <= 30) + new.print $_ +end + + +# @@PLEAC@@_7.9 +#% ruby -i.orig -pe 'FILTER COMMAND' file1 file2 file3 ... +# +#----------------------------- +##!/usr/bin/ruby -i.orig -p +# filter commands go here +#----------------------------- + +#% ruby -pi.orig -e 'gsub!(/DATE/){Time.now)' + +# effectively becomes: +ARGV << 'I' +oldfile = "" +while gets + if ARGF.filename != oldfile + newfile = ARGF.filename + File.rename(newfile, newfile + ".orig") + $stdout = File.open(newfile,'w') + oldfile = newfile + end + gsub!(/DATE/){Time.now} + print +end +$stdout = STDOUT +#----------------------------- +#% ruby -i.old -pe 'gsub!(%r{\bhisvar\b}, 'hervar')' *.[Cchy] + +#----------------------------- +# set up to iterate over the *.c files in the current directory, +# editing in place and saving the old file with a .orig extension +$-i = '.orig' # set up -i mode +ARGV.replace(Dir['*.[Cchy]']) +while gets + if $. == 1 + print "This line should appear at the top of each file\n" + end + gsub!(/\b(p)earl\b/i, '\1erl') # Correct typos, preserving case + print + ARGF.close if ARGF.eof +end + + +# @@PLEAC@@_7.10 +File.open('itest', 'r+') do |f| # open file for update + lines = f.readlines # read into array of lines + lines.each do |it| # modify lines + it.gsub!(/foo/, 'QQQ') + end + f.pos = 0 # back to start + f.print lines # write out modified lines + f.truncate(f.pos) # truncate to new length +end # file is automatically closed +#----------------------------- +File.open('itest', 'r+') do |f| + out = "" + f.each do |line| + out << line.gsub(/DATE/) {Time.now} + end + f.pos = 0 + f.print out + f.truncate(f.pos) +end + +# @@PLEAC@@_7.11 +File.open('infile', 'r+') do |f| + f.flock File::LOCK_EX + # update file +end +#----------------------------- +File::LOCK_SH # shared lock (for reading) +File::LOCK_EX # exclusive lock (for writing) +File::LOCK_NB # non-blocking request +File::LOCK_UN # free lock +#----------------------------- +unless f.flock File::LOCK_EX | File::LOCK_NB + warn "can't get immediate lock: blocking ..." + f.flock File::LOCK_EX +end +#----------------------------- +File.open('numfile', File::RDWR|File::CREAT) do |f| + f.flock(File::LOCK_EX) + num = f.gets.to_i || 0 + f.pos = 0 + f.truncate 0 + f.puts num + 1q +end + + +# @@PLEAC@@_7.12 +output_handle.sync = true +# Please note that like in Perl, $stderr is already unbuffered +#----------------------------- +#!/usr/bin/ruby -w +# seeme - demo stdio output buffering +$stdout.sync = ARGV.size > 0 +print "Now you don't see it..." +sleep 2 +puts "now you do" +#----------------------------- +$stderr.sync = true +afile.sync = false +#----------------------------- +# assume 'remote_con' is an interactive socket handle, +# but 'disk_file' is a handle to a regular file. +remote_con.sync = true # unbuffer for clarity +disk_file.sync = false # buffered for speed +#----------------------------- +require 'socket' +sock = TCPSocket.new('www.ruby-lang.org', 80) +sock.sync = true +sock.puts "GET /en/ HTTP/1.0 \n\n" +resp = sock.read +print "DOC IS: #{resp}\n" + + +# @@PLEAC@@_7.13 +#----------------------------- +# assumes fh1, fh2, fh2 are oen IO objects +nfound = select([$stdin, fh1, fh2, fh3], nil, nil, 0) +nfound[0].each do |file| + case file + when fh1 + # do something with fh1 + when fh2 + # do something with fh2 + when fh3 + # do something with fh3 + end +end +#----------------------------- +input_files = [] +# repeat next line for all in-files to poll +input_files << fh1 +if nfound = select(input_files, nil, nil, 0) + # input ready on files in nfound[0] +end + + +# @@PLEAC@@_8.0 +#----------------------------- +# datafile is a file or IO object +datafile.readlines.each { |line| + line.chomp! + size = line.length + puts size +} +#----------------------------- +datafile.readlines.each { |line| + puts line.chomp!.length +} +#----------------------------- +lines = datafile.readlines +#----------------------------- +whole_file = file.read +#----------------------------- +# ruby -040 -e 'word = gets; puts "First word is #{word}"' +#----------------------------- +# ruby -ne 'BEGIN { $/="%%\n" }; $_.chomp; puts $_ if( $_=~/Unix/i)' fortune.dat +#----------------------------- +handle.print "one", "two", "three" # "onetwothree" +puts "Baa baa black sheep." # sent to $stdout +#----------------------------- +buffer = handle.read(4096) +rv = buffer.length +#----------------------------- +handle.truncate(length) +open("/tmp#{$$}.pid", 'w') { |handle| handle.truncate(length) } +#----------------------------- +pos = datafile.pos # tell is an alias of pos +puts "I'm #{pos} bytes from the start of datafile" +#----------------------------- +logfile.seek(0, IO::SEEK_END) +datafile.seek(pos) # IO::SEEK_SET is the default +out.seek(-20, IO::SEEK_CUR) +#----------------------------- +written = datafile.syswrite(mystring) +raise RunTimeError unless written == mystring.length +block = infile.sysread(256) # no equivalent to perl offset parameter in sysread +puts "only read #{block.length} bytes" if 256 != block.length +#----------------------------- +pos = handle.sysseek(0, IO::SEEK_CUR) # don't change position + + +# @@PLEAC@@_8.1 +while (line = fh.gets) + line.chomp! + nextline = nil + line.gsub!(/\\$/) { |match| nextline = fh.gets; '' } + if (nextline != nil) + line += nextline + redo + end + # process full record in line here +end +#----------------------------- +# DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) \ +# $(TEXINFOS) $(INFOS) $(MANS) $(DATA) +# DEP_DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) \ +# $(TEXINFOS) $(INFO_DEPS) $(MANS) $(DATA) \ +# $(EXTRA_DIST) +#----------------------------- +line.gsub!(/\\\s*$/, '') { + # as before +} + + +# @@PLEAC@@_8.2 +#----------------------------- +count = `wc -l < #{filename}` +fail "wc failed: #{$?}" if $? != 0 +count.chomp! +#----------------------------- +count = 0 +File.open(file, 'r') { |fh| + count += 1 while fh.gets +} +# count now holds the number of lines read +#----------------------------- +count = 0 +while (chunk = file.sysread(2**16)) + count += chunk.count("\n") +end rescue EOFError +#----------------------------- +File.open(filename,'r') { |fh| + count += 1 while fh.gets +} +# count now holds the number of lines read +#----------------------------- +# As ruby doesn't quite have an equivalent to using a for +# statement as in perl, I threw this in +count = File.readlines(filename).size +#----------------------------- +1 while file.gets +count = $. +#----------------------------- +$/ = '' +open(filename, 'r') { |fh| + 1 while fh.gets + para_count = $. +} rescue fail("can't open #{filename}: $!") +#----------------------------- + + +# ^^PLEAC^^_8.3 +#----------------------------- +while (gets) + split.each { |chunk| + # do something with chunk + } +end +#----------------------------- +while (gets) + gsub(/(\w[\w'-]*)/) { |word| + # do something with word + } +end +#----------------------------- +# Make a word frequency count +# normally hashes can be created using {} or just Hash.new +# but we want the default value of an entry to be 0 instead +# of nil. (nil can't be incremented) +seen = Hash.new(0) +while (gets) + gsub(/(\w[\w'-]*)/) { |word| + seen[word.downcase] += 1 + } +end +# output hash in a descending numeric sort of its values +seen.sort { |a,b| b[1] <=> a[1] }.each do |k,v| + printf("%5d %s\n", v, k ) +end + +#----------------------------- +# Line frequency count +seen = Hash.new(0) +while (gets) + seen[$_.downcase] += 1 +end +seen.sort { |a,b| b[1] <=> a[1] }.each do |k,v| + printf("%5d %s\n", v, k ) +end +#----------------------------- + + +# @@PLEAC@@_8.4 +#----------------------------- +# instead of file handle FILE, we can just +# use a string containing the filename +File.readlines(file).each { |line| + # do something with line +} +#----------------------------- +File.readlines(file).reverse_each { |line| + # do something with line +} +#----------------------------- +# the variable lines might have been created +# this way +# lines = File.readlines(file) +# +# normally one would use the reverse_each, but +# if you insist on using a numerical index to +# iterate over the lines array... +(lines.size - 1).downto(0) { |i| + line = lines[i] +} +#----------------------------- +# the second readlines argument is a the +# record separator $/, just like perl, a blank +# separator splits the records into paragraphs +File.readlines(file, '').each { |paragraph| + # do something with paragraph + puts "->Paragraph #{paragraph}" +} +#----------------------------- + + +# @@PLEAC@@_8.6 + +$/ = "%\n"; +srand; + +File.open('/usr/share/fortune/humorists').each do |line| + adage = line if rand($.) < 1 +end + +puts adage; + + +# @@PLEAC@@_8.10 +begin + fh = File.open(file, "r+") + addr = fh.tell unless fh.eof while fh.gets + fh.truncate(addr) +rescue SystemCallError + $stderr.puts "#$!" +end + + +# @@PLEAC@@_9.0 +entry = File.stat("/usr/bin/vi") +entry = File.stat("/usr/bin") +entry = File.stat(INFILE) + +entry = File.stat("/usr/bin/vi") +ctime = entry.ctime +size = entry.size + +f = File.open(filename, "r") + +## There is no -T equivalent in Ruby, but we can still test emptiness +if test(?s, filename) + puts "#{filename} doesn't have text in it." + exit +end + +Dir.new("/usr/bin").each do |filename| + puts "Inside /usr/bin is something called #{filename}" +end + + +# @@PLEAC@@_9.1 +file = File.stat("filename") +readtime, writetime = file.atime, file.mtime +file.utime(readtime, writetime) + +SECONDS_PER_DAY = 60 * 60 * 24 +file = File.stat("filename") +atime, mtime = file.atime, file.mtime + +atime -= 7 * SECONDS_PER_DAY +mtime -= 7 * SECONDS_PER_DAY + +File.utime(atime, mtime, file) +mtime = File.stat(file).mtime +File.utime(Time.new, mtime, file) +File.utime(Time.new, File.stat("testfile").mtime, file) + +#----------------------------- +#!/usr/bin/ruby -w +## uvi - vi a file without changing it's access times + +if ARGV.length != 1 + puts "usage: uvi filename" + exit +end +file = ARGV[0] +atime, mtime = File.stat(file).atime, File.stat(file).mtime +system(ENV["EDITOR"] || "vi", file) +File.utime(atime, mtime, file) +#----------------------------- + + +# @@PLEAC@@_9.2 +File.unlink(FILENAME) + +err_flg = false +filenames.each do |file| + begin + File.unlink(file) + rescue + err_flg = $! + end +end +err_flg and raise "Couldn't unlink all of #{filenames.join(" ")}: #{err_flg}" + +File.unlink(file) + +count = filenames.length +filenames.each do |file| + begin + File.unlink(file) + rescue + count -= 1 + end +end +if count != filenames.length + STDERR.puts "could only delete #{count} of #{filenames.length} files" +end + + +# @@PLEAC@@_9.3 +require "ftools" +File.copy(oldfile, newfile) + +infile = File.open(oldfile, "r") +outfile = File.open(newfile, "w") + +blksize = infile.stat.blksize +# This doesn't handle partial writes or ^Z +# like the Perl version does. +while (line = infile.read(blksize)) + outfile.write(line) +end + +infile.close +outfile.close + +system("cp #{oldfile} #{newfile}") # unix +system("copy #{oldfile} #{newfile}") # dos, vms + +require "ftools" +File.copy("datafile.dat", "datafile.bak") +File.move("datafile.new", "datafile.dat") + + +# @@PLEAC@@_9.4 +$seen = {} # must use global var to be seen inside of method below + +def do_my_thing(filename) + dev, ino = File.stat(filename).dev, File.stat(filename).ino + unless $seen[[dev, ino]] + # do something with $filename because we haven't + # seen it before + end + $seen[[dev, ino]] = $seen[[dev, ino]].to_i + 1 +end + +files.each do |filename| + dev, ino = File.stat(filename).dev, File.stat(filename).ino + if !$seen.has_key?([dev, ino]) + $seen[[dev, ino]] = [] + end + $seen[[dev, ino]].push(filename) +end + +$seen.keys.sort.each do |devino| + ino, dev = devino + if $seen[devino].length > 1 + # $seen[devino] is a list of filenames for the same file + end +end + + +# @@PLEAC@@_9.5 +Dir.open(dirname) do |dir| + dir.each do |file| + # do something with dirname/file + puts file + end +end +# Dir.close is automatic + +# No -T equivalent in Ruby + +dir.each do |file| + next if file =~ /^\.\.?$/ + # ... +end + +def plainfiles(dir) + dh = Dir.open(dir) + dh.entries.grep(/^[^.]/). + map {|file| "#{dir}/#{file}"}. + find_all {|file| test(?f, file)}. + sort +end + + +# @@PLEAC@@_9.6 +list = Dir.glob("*.c") + +dir = Dir.open(path) +files = dir.entries.grep(/\.c$/) +dir.close + +files = Dir.glob("*.c") +files = Dir.open(path).entries.grep(/\.[ch]$/i) + +dir = Dir.new(path) +files = dir.entries.grep(/\.[ch]$/i) + +begin + d = Dir.open(dir) +rescue Errno::ENOENT + raise "Couldn't open #{dir} for reading: #{$!}" +end + +files = [] +d.each do |file| + puts file + next unless file =~ /\.[ch]$/i + + filename = "#{dir}/#{file}" + # There is no -T equivalent in Ruby, but we can still test emptiness + files.push(filename) if test(?s, filename) +end + +dirs.entries.grep(/^\d+$/). + map { |file| [file, "#{path}/#{file}"]} . + select { |file| test(?d, file[1]) }. + sort { |a,b| a[0] <=> b[0] }. + map { |file| file[1] } + + +# @@PLEAC@@_9.7 +require 'find' +Find.find(dirlist) do |file| + # do whatever +end + +require 'find' +argv = ARGV.empty? ? %w{.} : ARGV +Find.find(*argv) do |file| + print file, (test(?d, file) ? "/\n" : "\n") +end + +require 'find' +argv = ARGV.empty? ? %w{.} : ARGV +sum = 0 +Find.find(*argv) do |file| + size = test(?s, file) || 0 + sum += size +end +puts "#{argv.join(' ')} contains #{sum} bytes" + +require 'find' +argv = ARGV.empty? ? %w{.} : ARGV +saved_size, saved_name = -1, "" +Find.find(*argv) do |file| + size = test(?s, file) || 0 + next unless test(?f, file) && size > saved_size + saved_size = size + saved_name = file +end +puts "Biggest file #{saved_name} in #{argv.join(' ')} is #{saved_size}" + +require 'find' +argv = ARGV.empty? ? %w{.} : ARGV +age, name = nil +Find.find(*argv) do |file| + mtime = File.stat(file).mtime + next if age && age > mtime + age = mtime + name = file +end +puts "#{name} #{age}" + +#----------------------------- +#!/usr/bin/ruby -w +# fdirs - find all directories +require 'find' +argv = ARGV.empty? ? %w{.} : ARGV +File.find(*argv) { |file| puts file if test(?d, file) } +#----------------------------- + + +# @@PLEAC@@_9.8 +require 'fileutils' + +puts "Usage #{$0} dir ..." if ARGV.empty? +ARGV.each do |dir| + FileUtils.rmtree(dir) +end + + +# @@PLEAC@@_9.9 +require 'ftools' +names.each do |file| + newname = file + begin + File.move(file, newname) + rescue Errno::EPERM + $stderr.puts "Couldn't rename #{file} to #{newname}: #{$!}" + end +end + +require 'ftools' +op = ARGV.empty? ? (raise "Usage: rename expr [files]\n") : ARGV.shift +argv = ARGV.empty? ? $stdin.readlines.map { |f| f.chomp } : ARGV +argv.each do |file| + was = file + file = eval("file.#{op}") + File.move(was, file) unless was == file +end + + +# @@PLEAC@@_9.10 +base = File.basename(path) +dir = File.dirname(path) +# ruby has no fileparse equivalent +dir, base = File.split(path) +ext = base.scan(/\..*$/).to_s + +path = '/usr/lib/libc.a' +file = File.basename(path) +dir = File.dirname(path) + +puts "dir is #{dir}, file is #{file}" +# dir is /usr/lib, file is libc.a + +path = '/usr/lib/libc.a' +dir, filename = File.split(path) +name, ext = filename.split(/(?=\.)/) +puts "dir is #{dir}, name is #{name}, ext is #{ext}" +# NOTE: The Ruby code prints +# dir is /usr/lib, name is libc, extension is .a +# while the Perl code prints a '/' after the directory name +# dir is /usr/lib/, name is libc, extension is .a + +# No fileparse_set_fstype() equivalent in ruby + +def extension(path) + ext = path.scan(/\..*$/).to_s + ext.sub(/^\./, "") +end + + +# @@PLEAC@@_9.11 +#----------------------------- +#!/usr/bin/ruby -w +# symirror - build spectral forest of symlinks + +require 'find' +require 'fileutils' + +raise "usage: #{$0} realdir mirrordir" unless ARGV.size == 2 + +srcdir,dstdir = ARGV +srcmode = File::stat(srcdir).mode +Dir.mkdir(dstdir, srcmode & 07777) unless test(?d, dstdir) + +# fix relative paths +Dir.chdir(srcdir) {srcdir = Dir.pwd} +Dir.chdir(dstdir) {dstdir = Dir.pwd} + +Find.find(srcdir) do |srcfile| + if test(?d, srcfile) + dest = srcfile.sub(/^#{srcdir}/, dstdir) + dmode = File::stat(srcfile).mode & 07777 + Dir.mkdir(dest, dmode) unless test(?d, dest) + a = Dir["#{srcfile}/*"].reject{|f| test(?d, f)} + FileUtils.ln_s(a, dest) + end +end + + +# @@PLEAC@@_9.12 +# we use the Getopt/Declare library here for convenience: +# http://raa.ruby-lang.org/project/getoptdeclare/ +#----------------------------- +#!/usr/bin/ruby -w +# lst - list sorted directory contents (depth first) + +require 'find' +require 'etc' +require "Getopt/Declare" + +# Note: in the option-spec below there must by at least one hard +# tab in between each -option and its description. For example +# -i <tab> read from stdin + +opts = Getopt::Declare.new(<<'EOPARAM') + ============ + Input Format: + -i read from stdin + ============ + Output Format: + -l long listing + -r reverse listing + ============ + Sort on: (one of) + -m mtime (modify time - default) + {$sort_criteria = :mtime} + -u atime (access time) + {$sort_criteria = :atime} + -c ctime (inode change time) + {$sort_criteria = :ctime} + -s size + {$sort_criteria = :size} + [mutex: -m -u -c -s] + +EOPARAM + +$sort_criteria ||= :mtime +files = {} +DIRS = opts['-i'] ? $stdin.readlines.map{|f|f.chomp!} : ARGV +DIRS.each do |dir| + Find.find(dir) do |ent| + files[ent] = File::stat(ent) + end +end +entries = files.keys.sort_by{|f| files[f].send($sort_criteria)} +entries = entries.reverse unless opts['-r'] + +entries.each do |ent| + unless opts['-l'] + puts ent + next + end + stats = files[ent] + ftime = stats.send($sort_criteria == :size ? :mtime : $sort_criteria) + printf "%6d %04o %6d %8s %8s %8d %s %s\n", + stats.ino, + stats.mode & 07777, + stats.nlink, + ETC::PASSWD[stats.uid].name, + ETC::GROUP[stats.gid].name, + stats.size, + ftime.strftime("%a %b %d %H:%M:%S %Y"), + ent +end + + +# @@PLEAC@@_10.0 +def hello + $greeted += 1 # in Ruby, a variable beginning with $ is global (can be any type of course) + puts "hi there!" +end + +# We need to initialize $greeted before it can be used, because "+=" is waiting a Numeric object +$greeted = 0 +hello # note that appending () is optional to function calls with no parameters + + +# @@PLEAC@@_10.1 +# In Ruby, parameters are named anyway +def hypotenuse(side1, side2) + Math.sqrt(side1**2 + side2**2) # the sqrt function comes from the Math module +end +diag = hypotenuse(3, 4) + +puts hypotenuse(3, 4) + +a = [3, 4] +print hypotenuse(*a) # the star operator will magically convert an Array into a "tuple" + +both = men + women + +# In Ruby, all objects are references, so the same problem arises; we then return a new object +nums = [1.4, 3.5, 6.7] +def int_all(n) + n.collect { |v| v.to_i } +end +ints = int_all(nums) + +nums = [1.4, 3.5, 6.7] +def trunc_em(n) + n.collect! { |v| v.to_i } # the bang-version of collect modifies the object +end +trunc_em(nums) + +# Ruby has two chomp version: +# ``chomp'' chomps the record separator and returns what's expected +# ``chomp!'' does the same but also modifies the parameter object + + +# @@PLEAC@@_10.2 +def somefunc + variable = something # variable is local by default +end + +name, age = ARGV +start = fetch_time + +a, b = pair # will succeed if pair is an Array object (like ARGV is) +c = fetch_time + +# In ruby, run_check can't access a, b, or c until they are +# explicitely defined global (using leading $), even if they are +# both defined in the same scope + +def check_x(x) + y = "whatever" + run_check + if $condition + puts "got $x" + end +end + +# The following will keep a reference to the array, though the +# results will be slightly different from perl: the last element +# of $global_array will be itself an array +def save_array(ary) + $global_array << ary +end + +# The following gives the same results as in Perl for $global_array, +# though it doesn't illustrate anymore the way to keep a reference +# to an object: $global_array is extended with the elements of ary +def save_array(ary) + $global_array += ary +end + + +# @@PLEAC@@_10.3 +# In Ruby, AFAIK a method cannot access "local variables" defined +# upper scope; mostly because everything is an object, so you'll +# do the same by defining an attribute or a static attribute + +# In Ruby the BEGIN also exists: +BEGIN { puts "hello from BEGIN" } +puts "hello from main" +BEGIN { puts "hello from 2nd BEGIN" } +# gives: +# hello from BEGIN +# hello from 2nd BEGIN +# hello from main + +# In Ruby, it can be written as a static method and a static +# variable +class Counter + @@counter = 0 + def Counter.next_counter; @@counter += 1; end +end + +# There is no need of BEGIN since the variable will get +# initialized when parsing +class Counter + @@counter = 42 + def Counter.next_counter; @@counter += 1; end + def Counter.prev_counter; @@counter -= 1; end +end + + +# @@PLEAC@@_10.4 +# You can either get the whole trace as an array of strings, each +# string telling which file, line and method is calling: +caller + +# ...or only the last caller +caller[0] + +# We need to extract just the method name of the backtrace: +def whoami; caller()[0] =~ /in `([^']+)'/ ? $1 : '(anonymous)'; end +def whowasi; caller()[1] =~ /in `([^']+)'/ ? $1 : '(anonymous)'; end + + +# @@PLEAC@@_10.5 +# In Ruby, every value is a reference on an object, thus there is +# no such problem +array_diff(array1, array2) + +def add_vecpair(a1, a2) + results = [] + a1.each_index { |i| results << (a1[i] + a2[i]) } + results +end +a = [1, 2] +b = [5, 8] +c = add_vecpair(a, b) +p c + +# Add this to the beginning of the function to check if we were +# given two arrays +a1.type == Array && a2.type == Array or + raise "usage: add_vecpair array1 array2 (was used with: #{a1.type} #{a2.type})" + + +# @@PLEAC@@_10.6 +# There is no return context in Ruby + + +# @@PLEAC@@_10.7 +# Like in Perl, we need to fake with a hash, but it's dirty :-( +def thefunc(param_args) + args = { 'INCREMENT' => '10s', 'FINISH' => '0', 'START' => 0 } + args.update(param_args) + if (args['INCREMENT'] =~ /m$/ ) + # ..... + end +end + +thefunc({ 'INCREMENT' => '20s', 'START' => '+5m', 'FINISH' => '+30m' }) +thefunc({}) + + +# @@PLEAC@@_10.8 +# there is no "undef" direct equivalent but there is the slice equiv: +a, c = func.indexes(0, 2) + + +# @@PLEAC@@_10.9 +# Ruby has no such limitation: +def somefunc + ary = [] + hash = {} + # ... + return ary, hash +end +arr, dict = somefunc + +array_of_hashes = fn +h1, h2, h3 = fn + + +# @@PLEAC@@_10.10 +return +# or (equivalent) +return nil + + +# @@PLEAC@@_10.11 +# You can't prototype in Ruby regarding types :-( +# Though, you can force the number of arguments: +def func_with_no_arg; end +def func_with_no_arg(); end +def func_with_one_arg(a1); end +def func_with_two_args(a1, a2); end +def func_with_any_number_of_args(*args); end + + +# @@PLEAC@@_10.12 +raise "some message" # raise exception + +begin + val = func +rescue Exception => msg + $stderr.puts "func raised an exception: #{msg}" +end + +# In Ruby the rescue statement uses an exception class, every +# exception which is not matched is still continuing +begin + val = func +rescue FullMoonError + ... +end + + +# @@PLEAC@@_10.13 +# Saving Global Values +# Of course we can just save the value and restore it later: +def print_age + puts "Age is #{$age}" +end + +$age = 18 # global variable +print_age() +if condition + safeage = $age + $age = 23 + print_age() + $age = safeage +end + +# We can also use a method that saves the global variable and +# restores it automatically when the block is left: + +def local(var) + eval("save = #{var.id2name}") + begin + result = yield + ensure + # we want to call this even if we got an exception + eval("#{var.id2name} = save") + end + result +end + +condition = true +$age = 18 +print_age() +if condition + local(:$age) { + $age = 23 + print_age() + } +end +print_age() + +# There is no need to use local() for filehandles or directory +# handles in ruby because filehandles are normal objects. + + +# @@PLEAC@@_10.14 +# In Ruby you may redefine a method [but not overload it :-(] +# just by defining again with the same name. +def foo; puts 'foo'; end +def foo; puts 'bar'; end +foo +#=> bar + +# You can also take a reference to an existing method before +# redefining a new one, using the `alias' keyword +def foo; puts 'foo'; end +alias foo_orig foo +def foo; puts 'bar'; end +foo_orig +foo +#=> foo +#=> bar + +# AFAIK, there is no direct way to create a new method whose name +# comes from a variable, so use "eval" +colors = %w(red blue green yellow orange purple violet) +colors.each { |c| + eval <<-EOS + def #{c}(*a) + "<FONT COLOR='#{c}'>" + a.to_s + "</FONT>" + end + EOS +} + + +# @@PLEAC@@_10.15 +def method_missing(name, *args) + "<FONT COLOR='#{name}'>" + args.join(' ') + "</FONT>" +end +puts chartreuse("stuff") + + +# @@PLEAC@@_10.16 +def outer(arg) + x = arg + 35 + inner = proc { x * 19 } + x + inner.call() +end + + +# @@PLEAC@@_10.17 +#!/usr/bin/ruby -w +# mailsort - sort mbox by different criteria +require 'English' +require 'Date' + +# Objects of class Mail represent a single mail. +class Mail + attr_accessor :no + attr_accessor :subject + attr_accessor :fulltext + attr_accessor :date + + def initialize + @fulltext = "" + @subject = "" + end + + def append(para) + @fulltext << para + end + + # this is called if you call puts(mail) + def to_s + @fulltext + end +end + +# represents a list of mails. +class Mailbox < Array + + Subjectpattern = Regexp.new('Subject:\s*(?:Re:\s*)*(.*)\n') + Datepattern = Regexp.new('Date:\s*(.*)\n') + + # reads mails from open file and stores them + def read(file) + $INPUT_RECORD_SEPARATOR = '' # paragraph reads + msgno = -1 + file.each { |para| + if para =~ /^From/ + mail = Mail.new + mail.no = (msgno += 1) + md = Subjectpattern.match(para) + if md + mail.subject = md[1] + end + md = Datepattern.match(para) + if md + mail.date = DateTime.parse(md[1]) + else + mail.date = DateTime.now + end + self.push(mail) + end + mail.append(para) if mail + } + end + + def sort_by_subject_and_no + self.sort_by { |m| + [m.subject, m.no] + } + end + + # sorts by a list of attributs of mail, given as symbols + def sort_by_attributs(*attrs) + # you can sort an Enumerable by an array of + # values, they would be compared + # from ary[0] to ary[n]t, say: + # ['b',1] > ['a',10] > ['a',9] + self.sort_by { |elem| + attrs.map { |attr| + elem.send(attr) + } + } + end + +end + +mailbox = Mailbox.new +mailbox.read(ARGF) + +# print only subjects sorted by subject and number +for m in mailbox.sort_by_subject_and_no + puts(m.subject) +end + +# print complete mails sorted by date, then subject, then number +for m in mailbox.sort_by_attributs(:date, :subject) + puts(m) +end + + +# @@PLEAC@@_11.7 +def mkcounter(count) + start = count + bundle = { + "NEXT" => proc { count += 1 }, + "PREV" => proc { count -= 1 }, + "RESET" => proc { count = start } + } + bundle["LAST"] = bundle["PREV"] + return bundle +end + +c1 = mkcounter(20) +c2 = mkcounter(77) + +puts "next c1: #{c1["NEXT"].call}" # 21 +puts "next c2: #{c2["NEXT"].call}" # 78 +puts "next c1: #{c1["NEXT"].call}" # 22 +puts "last c1: #{c1["PREV"].call}" # 21 +puts "last c1: #{c1["LAST"].call}" # 20 +puts "old c2: #{c2["RESET"].call}" # 77 + + +# @@PLEAC@@_11.15 +class Binary_tree + def initialize(val) + @value = val + @left = nil + @right = nil + end + + # insert given value into proper point of + # provided tree. If no tree provided, + # use implicit pass by reference aspect of @_ + # to fill one in for our caller. + def insert(val) + if val < @value then + if @left then + @left.insert(val) + else + @left = Binary_tree.new(val) + end + elsif val > @value then + if @right then + @right.insert(val) + else + @right = Binary_tree.new(val) + end + else + puts "double" + # do nothing, no double values + end + end + + # recurse on left child, + # then show current value, + # then recurse on right child. + def in_order + @left.in_order if @left + print @value, " " + @right.in_order if @right + end + + # show current value, + # then recurse on left child, + # then recurse on right child. + def pre_order + print @value, " " + @left.pre_order if @left + @right.pre_order if @right + end + + # recurse on left child, + # then recurse on right child, + # then show current value. + def post_order + @left.post_order if @left + @right.post_order if @right + print @value, " " + end + + # find out whether provided value is in the tree. + # if so, return the node at which the value was found. + # cut down search time by only looking in the correct + # branch, based on current value. + def search(val) + if val == @value then + return self + elsif val < @value then + return @left.search(val) if @left + return nil + else + return @right.search(val) if @right + return nil + end + end +end + +# first generate 20 random inserts +test = Binary_tree.new(0) +for a in 0..20 + test.insert(rand(1000)) +end + +# now dump out the tree all three ways +print "Pre order: "; test.pre_order; puts "" +print "In order: "; test.in_order; puts "" +print "Post order: "; test.post_order; puts "" + +print "search?" +while gets + print test.search($_.to_i) + print "\nsearch?" +end + + +# @@PLEAC@@_12.0 +# class and module names need to have the first letter capitalized +module Alpha + NAME = 'first' +end +module Omega + NAME = 'last' +end +puts "Alpha is #{Alpha::NAME}, Omega is #{Omega::NAME}" + +# ruby doesn't differentiate beteen compile-time and run-time +require 'getoptlong.rb' +require 'getoptlong' # assumes the .rb +require 'cards/poker.rb' +require 'cards/poker' # assumes the .rb +load 'cards/poker' # require only loads the file once + +module Cards + module Poker + @card_deck = Array.new # or @card_deck = [] + def shuffle + end + end +end + + +# @@PLEAC@@_12.1 +# a module exports all of its functions +module Your_Module + def self.function + # this would be called as Your_Module.function + end + + def Your_Module.another + # this is the same as above, but more specific + end +end + +# @@PLEAC@@_12.2 +begin + require 'nonexistent' +rescue LoadError + puts "Couldn't load #{$!}" # $! contains the last error string +end + +# @@PLEAC@@_12.4 +# module variables are private unless access functions are defined +module Alpha + @aa = 10 + @bb = 11 + + def self.put_aa + puts @aa + end + + def self.bb=(val) + @bb = val + end +end + +Alpha.bb = 12 +# Alpha.aa = 10 # error, no aa=method + + +# @@PLEAC@@_12.5 +# caller provides a backtrace of the call stack +module MyModule + def find_caller + caller + end + + def find_caller2(i) + caller(i) # an argument limits the size of the stack returned + end +end + + +# @@PLEAC@@_12.6 +BEGIN { + $logfile = '/tmp/mylog' unless defined? $logfile + $LF = File.open($logfile, 'a') +} + +module Logger + def self.logmsg(msg) + $LF.puts msg + end + + logmsg('startup') +end + +END { + Logger::logmsg('shutdown') + $LF.close +} + + +# @@PLEAC@@_12.7 +#----------------------------- +# results may be different on your system +# % ruby -e "$LOAD_PATH.each_index { |i| printf("%d %s\n", i, $LOAD_PATH[i] } +#0 /usr/local/lib/site_ruby/1.6 +#1 /usr/local/lib/site_ruby/1.6/i386-linux +#2 /usr/local/lib/site_ruby/ +#3 /usr/lib/ruby/1.6 +#4 /usr/lib/ruby/1.6/i136-linux +#5 . +#----------------------------- +# syntax for sh, bash, ksh, or zsh +#$ export RUBYLIB=$HOME/rubylib + +# syntax for csh or tcsh +# % setenv RUBYLIB ~/rubylib +#----------------------------- +$LOAD_PATH.unshift "/projects/spectre/lib"; + + +# @@PLEAC@@_12.8 +# equivalents in ruby are mkmf, SWIG, or Ruby/DL depending on usage + + +# @@PLEAC@@_12.9 +# no equivalent in ruby + + +# @@PLEAC@@_12.10 +# no equivalent in ruby + + +# @@PLEAC@@_12.11 +module FineTime + def self.time + # to be defined later + end +end + + +module FineTime + def self.time + "its a fine time" + end +end + +puts FineTime.time #=> "its a fine time" + + +# @@PLEAC@@_12.12 +def even_only(n) + raise "#{n} is not even" if (n & 1) != 0 # one way to test + # ... +end +def even_only(n) + $stderr.puts "#{n} is not even" if (n & 1) != 0 + # ... +end + + +# @@PLEAC@@_12.17 +# The library archive for ruby is called Ruby Application archive, +# or shorter RAA, and can be found at http://raa.ruby-lang.org. +# A typical library is installed like this: +# % gunzip some-module-4.54.tar.gz +# % tar xf some-module-4.54.tar +# % cd some-module-4.54.tar +# % ruby install.rb config +# % ruby install.rb setup +# get superuser previleges here if needed for next step +# % ruby install.rb install + +# Some modules use a different process, +# you should find details in the documentation +# Here is an example of such a different process +# % ruby extconf.rb +# % make +# % make install + +# If you want the module installed in your own directory: +# For ruby version specific libraries +# % ruby install.rb config --site-ruby=~/lib +# For version independent libraries +# % ruby install.rb config --site-ruby-common=~/lib + +# Information about possible options for config +# % ruby install.rb --help + +# If you have your own complete distribution +# % ruby install.rb --prefix=path=~/ruby-private + + +# @@PLEAC@@_13.0 +# Classes and objects in Ruby are rather straigthforward +class Person + # Class variables (also called static attributes) are prefixed by @@ + @@person_counter=0 + + # object constructor + def initialize(age, name, alive = true) # Default arg like in C++ + @age, @name, @alive = age, name, alive # Object attributes are prefixed by '@' + @@person_counter += 1 + # There is no '++' operator in Ruby. The '++'/'--' operators are in fact + # hidden assignments which affect variables, not objects. You cannot accomplish + # assignment via method. Since everything in Ruby is object, '++' and '--' + # contradict Ruby OO ideology. Instead '-=' and '+=' are used. + end + + attr_accessor :name, :age # This creates setter and getter methods for @name + # and @age. See 13.3 for detailes. + + # methods modifying the receiver object usually have the '!' suffix + def die! + @alive = false + puts "#{@name} has died at the age of #{@age}." + @alive + end + + def kill(anotherPerson) + print @name, ' is killing ', anotherPerson.name, ".\n" + anotherPerson.die! + end + + # methods used as queries + # usually have the '?' suffix + def alive? + @alive && true + end + + def year_of_birth + Time.now.year - @age + end + + # Class method (also called static method) + def Person.number_of_people + @@person_counter + end +end + +# Using the class: +# Create objects of class Person +lecter = Person.new(47, 'Hannibal') +starling = Person.new(29, 'Clarice', true) +pazzi = Person.new(40, 'Rinaldo', true) + +# Calling a class method +print "There are ", Person.number_of_people, " Person objects\n" + +print pazzi.name, ' is ', (pazzi.alive?) ? 'alive' : 'dead', ".\n" +lecter.kill(pazzi) +print pazzi.name, ' is ', (pazzi.alive?) ? 'alive' : 'dead', ".\n" + +print starling.name , ' was born in ', starling.year_of_birth, "\n" + + +# @@PLEAC@@_13.1 +# If you don't need any initialisation in the constructor, +# you don't need to write a constructor. +class MyClass +end + +class MyClass + def initialize + @start = Time.new + @age = 0 + end +end + +class MyClass + def initialize(inithash) + @start = Time.new + @age = 0 + for key, value in inithash + instance_variable_set("@#{key}", value) + end + end +end + +# @@PLEAC@@_13.2 +# Objects are destroyed by the garbage collector. +# The time of destroying is not predictable. +# The ruby garbage collector can handle circular references, +# so there is no need to write destructor for that. + +# There is no direct support for destructor. +# You can call a custom function, or more specific a proc object, when the +# garbage collector is about to destruct the object, but it is unpredictable +# when this occurs. +# Also if such a finalizer object has a reference to the orignal object, +# this may prevent the original object to get garbage collected. +# Because of this problem the finalize method below is +# a class method and not a instance method. +# So if you need to free resources for an object, like +# closing a socket or kill a spawned subprocess, +# you should do it explicitly. + +class MyClass + def initialize + ObjectSpace.define_finalizer(self, + self.class.method(:finalize).to_proc) + end + def MyClass.finalize(id) + puts "Object #{id} dying at #{Time.new}" + end +end + +# test code +3.times { + MyClass.new +} +ObjectSpace.garbage_collect + + +# @@PLEAC@@_13.3 +# You can write getter and setter methods in a natural way: +class Person + def name + @name + end + def name=(name) + @name = name + end +end + +# But there is a better and shorter way +class Person + attr_reader :age + attr_writer :name + # attr_reader and attr_writer are actually methods in class Class + # which set getter and setter methods for you. +end + +# There is also attr_accessor to create both setters and getters +class Person + attr_accessor :age, :name +end + + +# @@PLEAC@@_13.4 +class Person + # Class variables (also called static attributes) are prefixed by @@ + @@person_counter = 0 + + def Person.population + @@person_counter + end + def initialize + @@person_counter += 1 + ObjectSpace.define_finalizer(self, + self.class.method(:finalize).to_proc) + end + def Person.finalize(id) + @@person_counter -= 1 + end +end +people = [] +10.times { + people.push(Person.new) +} +printf("There are %d people alive", Person.population) + + +FixedArray.class_max_bounds = 100 +alpha = FixedArray.new +puts "Bound on alpha is #{alpha.max_bounds}" + +beta = FixedArray.new +beta.max_bounds = 50 # calls the instance method +beta.class.class_max_bounds = 50 # alternative, calls the class method +puts "Bound on alpha is #{alpha.max_bounds}" + +class FixedArray + @@bounds = 7 + + def max_bounds + @@max_bounds + end + # instance method, which sets the class variable + def max_bounds=(value) + @@max_bounds = value + end + # class method. This can only be called on a class, + # but not on the instances + def FixedArray.class_max_bounds=(value) + @@max_bounds = value + end +end + + +# @@PLEAC@@_13.5 +PersonStruct = Struct.new("Person", :name, :age, :peers) +# creates a class "Person::Struct", which is accessiable with the +# constant "PersonStruct" +p = PersonStruct.new +p = Struct::Person.new # alternative using the classname +p.name = "Jason Smythe" +p.age = 13 +p.peers = ["Wilbur", "Ralph", "Fred"] +p[:peers] = ["Wilbur", "Ralph", "Fred"] # alternative access using symbol +p["peers"] = ["Wilbur", "Ralph", "Fred"] # alternative access using name of field +p[2] = ["Wilbur", "Ralph", "Fred"] # alternative access using index of field +puts "At age #{p.age}, #{p.name}'s first friend is #{p.peers[0]}" + +# The fields of a struct have no special type, like other ruby variables +# you can put any objects in. Therefore the discussions how to specify +# the types of the fields do not apply to ruby. + +FamilyStruct = Struct.new("Family", :head, :address, :members) +folks = FamilyStruct.new +folks.head = PersonStruct.new +dad = folks.head +dad.name = "John" +dad.age = 34 + +# supply of own accessor method for the struct for error checking +class PersonStruct + def age=(value) + if !value.kind_of?(Integer) + raise(ArgumentError, "Age #{value} isn't an Integer") + elsif value > 150 + raise(ArgumentError, "Age #{value} is unreasonable") + end + @age = value + end +end + + +# @@PLEAC@@_13.6 +# The ruby Object class defines a dup and a clone method. +# The dup method is recommended for prototype object creation. +# The default implementation makes a shallow copy, +# but each class can override it, for example to make a deep copy. + +# If you want to call 'new' directly on the instances, +# you can create a instance method "new", which returns a new duplicate. +# This method is distinct from the class method new. +# +class A + def new + dup + end +end + +ob1 = A.new +# later on +ob2 = ob1.new + + +# @@PLEAC@@_13.7 +methname = 'flicker' +obj.send(methname, 10) # calls obj.flicker(10) + +# call three methods on the object, by name +['start', 'run', 'stop'].each do |method_string| + obj.send(method_string) +end + +# Another way is to create a Method object +method_obj = obj.method('flicker') +# And then call it +method_obj.call(10) + + +# @@PLEAC@@_13.8 +# All classes in Ruby inherit from class Object +# and thus all objects share methods defined in this class + +# the class of the object +puts any_object.type + +# Ruby classes are actually objects of class Class and they +# respond to methods defined in Object class as well + +# the superclass of this class +puts any_object.class.superclass + +# ask an object whether it is an instance of particular class +n = 4.7 +puts n.instance_of?(Float) # true +puts n.instance_of?(Numeric) # false + +# ask an object whether it is an instance of class, one of the +# superclasses of the object, or modules included in it +puts n.kind_of?(Float) # true (the class) +puts n.kind_of?(Numeric) # true (an ancestor class) +puts n.kind_of?(Comparable) # true (a mixin module) +puts n.kind_of?(String) # false + +# ask an object whether it can respond to a particular method +puts n.respond_to?('+') # true +puts n.respond_to?('length') # false + +# all methods an object can respond to +'just a string'.methods.each { |m| puts m } + + +# @@PLEAC@@_13.9 +# Actually any class in Ruby is inheritable +class Person + attr_accessor :age, :name + def initialize + @name + @age + end +end +#----------------------------- +dude = Person.new +dude.name = 'Jason' +dude.age = 23 +printf "%s is age %d.\n", dude.name, dude.age +#----------------------------- +# Inheriting from Person +class Employee < Person + attr_accessor :salary +end +#----------------------------- +empl = Employee.new +empl.name = 'Jason' +empl.age = 23 +empl.salary = 200 +printf "%s is age %d, the salary is %d.\n", empl.name, empl.age, empl.salary +#----------------------------- +# Any built-in class can be inherited the same way +class WeirdString < String + def initialize(obj) + super obj + end + def +(anotherObj) # + method in this class is overridden + # to return the sum of string lengths + self.length + anotherObj.length # 'self' can be omitted + end +end +#----------------------------- +a = WeirdString.new('hello') +b = WeirdString.new('bye') + +puts a + b # the overridden + +#=> 8 +puts a.length # method from the superclass, String +#=> 5 + + +# @@PLEAC@@_13.11 +# In ruby you can override the method_missing method +# to have a solution similar to perls AUTOLOAD. +class Person + + def initialize + @ok_fields = %w(name age peers parent) + end + + def valid_attribute?(name) + @ok_fields.include?(name) + end + + def method_missing(namesymbol, *params) + name = namesymbol.to_s + return if name =~ /^A-Z/ + if name.to_s[-1] == ('='[0]) # we have a setter + isSetter = true + name.sub!(/=$/, '') + end + if valid_attribute?(name) + if isSetter + instance_variable_set("@#{name}", *params) + else + instance_variable_get("@#{name}", *params) + end + else + # if no annestor is responsible, + # the Object class will throw a NoMethodError exception + super(namesymbol, *params) + end + end + + def new + kid = Person.new + kid.parent = self + kid + end + +end + +dad = Person.new +dad.name = "Jason" +dad.age = 23 +kid = dad.new +kid.name = "Rachel" +kid.age = 2 +puts "Kid's parent is #{kid.parent.name}" +puts dad +puts kid + +class Employee < Person + def initialize + super + @ok_fields.push("salary", "boss") + end + def ok_fields + @ok_fields + end +end + + +# @@PLEAC@@_13.13 +# The ruby garbage collector pretends to cope with circular structures. +# You can test it with this code: +class RingNode + attr_accessor :next + attr_accessor :prev + attr_reader :name + + def initialize(aName) + @name = aName + ObjectSpace.define_finalizer(self, + self.class.method(:finalize).to_proc) + end + + def RingNode.finalize(id) + puts "Node #{id} dying" + end + + def RingNode.show_all_objects + ObjectSpace.each_object {|id| + puts id.name if id.class == RingNode + } + end +end + +def create_test + a = RingNode.new("Node A") + b = RingNode.new("Node B") + c = RingNode.new("Node C") + a.next = b + b.next = c + c.next = a + a.prev = c + c.prev = b + b.prev = a + + a = nil + b = nil + c = nil +end + +create_test +RingNode.show_all_objects +ObjectSpace.garbage_collect +puts "After garbage collection" +RingNode.show_all_objects + + +# @@PLEAC@@_13.14 +class String + def <=>(other) + self.casecmp other + end +end + +# There is no way to directly overload the '""' (stringify) +# operator in Ruby. However, by convention, classes which +# can reasonably be converted to a String will define a +# 'to_s' method as in the TimeNumber class defined below. +# The 'puts' method will automatcally call an object's +# 'to_s' method as is demonstrated below. +# Furthermore, if a class defines a to_str method, an object of that +# class can be used most any place where the interpreter is looking +# for a String value. + +#--------------------------------------- +# NOTE: Ruby has a builtin Time class which would usually be used +# to manipulate time objects, the following is supplied for +# educational purposes to demonstrate operator overloading. +# +class TimeNumber + attr_accessor :hours,:minutes,:seconds + def initialize( hours, minutes, seconds) + @hours = hours + @minutes = minutes + @seconds = seconds + end + + def to_s + return sprintf( "%d:%02d:%02d", @hours, @minutes, @seconds) + end + + def to_str + to_s + end + + def +( other) + seconds = @seconds + other.seconds + minutes = @minutes + other.minutes + hours = @hours + other.hours + if seconds >= 60 + seconds %= 60 + minutes += 1 + end + if minutes >= 60 + minutes %= 60 + hours += 1 + end + return TimeNumber.new(hours, minutes, seconds) + end + + def -(other) + raise NotImplementedError + end + + def *(other) + raise NotImplementedError + end + + def /( other) + raise NotImplementedError + end +end + +t1 = TimeNumber.new(0, 58, 59) +sec = TimeNumber.new(0, 0, 1) +min = TimeNumber.new(0, 1, 0) +puts t1 + sec + min + min + +#----------------------------- +# StrNum class example: Ruby's builtin String class already has the +# capabilities outlined in StrNum Perl example, however the '*' operator +# on Ruby's String class acts differently: It creates a string which +# is the original string repeated N times. +# +# Using Ruby's String class as is in this example: +x = "Red"; y = "Black" +z = x+y +r = z*3 # r is "RedBlackRedBlackRedBlack" +puts "values are #{x}, #{y}, #{z}, and #{r}" +print "#{x} is ", x < y ? "LT" : "GE", " #{y}\n" +# prints: +# values are Red, Black, RedBlack, and RedBlackRedBlackRedBlack +# Red is GE Black + +#----------------------------- +class FixNum + REGEX = /(\.\d*)/ + DEFAULT_PLACES = 0 + attr_accessor :value, :places + def initialize(value, places = nil) + @value = value + if places + @places = places + else + m = REGEX.match(value.to_s) + if m + @places = m[0].length - 1 + else + @places = DEFAULT_PLACES + end + end + end + + def +(other) + FixNum.new(@value + other.value, max(@places, other.places)) + end + + def *(other) + FixNum.new(@value * other.value, max(@places, other.places)) + end + + def /(other) + puts "Divide: #{@value.to_f/other.value.to_f}" + result = FixNum.new(@value.to_f/other.value.to_f) + result.places = max(result.places,other.places) + result + end + + def to_s + sprintf("STR%s: %.*f", self.class.to_s , @places, @value) #. + end + + def to_str + to_s + end + + def to_i #convert to int + @value.to_i + end + + def to_f #convert to float` + @value.to_f + end + + private + def max(a,b) + a > b ? a : b + end +end + +def demo() + x = FixNum.new(40) + y = FixNum.new(12, 0) + + puts "sum of #{x} and #{y} is #{x+y}" + puts "product of #{x} and #{y} is #{x*y}" + + z = x/y + puts "#{z} has #{z.places} places" + unless z.places + z.places = 2 + end + + puts "div of #{x} by #{y} is #{z}" + puts "square of that is #{z*z}" +end + +if __FILE__ == $0 + demo() +end + + +# @@PLEAC@@_14.1 +# There are dbm, sdbm, gdbm modules +# and the bdb module for accessing the berkeley db +# sdbm seem to be available on the most systems, +# so we use it here +# +require "sdbm" +SDBM.open("filename", 0666) { |dbobj| + # raises exception if open error + + # the returned sdbm-dbobj has most of the methods of a hash + v = dbobj["key"] + dbobj["key"] = "newvalue" + if dbobj.has_key?("key") + # ... + end + dbobj.delete("key2") +} +# database is open only inside the block. + +# It is also possible to use a open .. close pair: +dbobj = SDBM.open("filename", 0666) +#.. do something with dbobj +dbobj.close + +#!/usr/bin/ruby -w +# userstats - generate statistics on who is logged in +# call with usernames as argument to display the totals +# for the given usernames, call with "ALL" to display all users + +require "sdbm" +filename = '/tmp/userstats.db' +SDBM.open(filename, 0666) { |dbobj| + if ARGV.length > 0 + if ARGV[0] == "ALL" + # ARGV is constant, so we need the variable userlist + userlist = dbobj.keys().sort() + else + userlist = ARGV + end + userlist.each { |user| + print "#{user}\t#{dbobj[user]}\n" + } + else + who = `who` + who.split("\n").each { |line| + md = /^(\S+)/.match(line) + raise "Bad line from who: #{line}" unless md + # sdbm stores only strings, so "+=" doesn't work, + # we need to convert them expicitly back to integer. + if dbobj.has_key?(md[0]) + dbobj[md[0]] = dbobj[md[0]].to_i + 1 + else + dbobj[md[0]] = "1" + end + } + end +} + + +# @@PLEAC@@_14.2 +# using open and clear +dbobj = SDBM.open("filename", 0666) +dbobj.clear() +dbobj.close() +# deleting file and recreating it +# the filenames depend on the flavor of dbm you use, +# for example sdbm has two files named filename.pag and filename.dir, +# so you need to delete both files +begin + File.delete("filename") + # raises Exception if not exist + dbobj = SDBM.open("filename", 0666) +rescue + # add error handling here +end + + +# @@PLEAC@@_14.3 +# sdbm2gdbm: converts sdbm database to a gdbm database +require "sdbm" +require "gdbm" + +unless ARGV.length == 2 + fail "usage: sdbm2gdbm infile outfile" +end +infile = ARGV[0] +outfile = ARGV[1] + +sdb = SDBM.open(infile) +gdb = GDBM.open(outfile, 0666) +sdb.each { |key, val| + gdb[key] = val +} +gdb.close +sdb.close + + +# @@PLEAC@@_14.4 +#!/usr/bin/ruby -w +# dbmmerge: merges two dbm databases +require "sdbm" + +unless ARGV.length == 3 + fail "usage: dbmmerge indb1 indb2 outdb" +end +infile1 = ARGV[0] +infile2 = ARGV[0] +outfile = ARGV[2] + +in1 = SDBM.open(infile1, nil) +in2 = SDBM.open(infile2, nil) +outdb = SDBM.open(outfile, 0666) + +[in1, in2].each { |indb| + indb.each { |key, val| + if outdb.has_key?(key) + # decide which value to set. + # set outdb[key] if necessary + else + outdb[key] = val + end + } +} +in1.close +in2.close +outdb.close + + +# @@PLEAC@@_14.7 +# we write a tie method that extends the Array class. +# It reads the file into the memory, executes the code block +# in which you can manipulate the array as needed, and writes +# the array back to the file after the end of the block execution +class Array + def tie(filename, flags) + File.open(filename, flags) { |f| + f.each_line { |line| + self.push(line.chomp) + } + yield + f.rewind + each { |line| + if line + f.puts(line) + else + f.puts "" + end + } + } + end +end + +array = Array.new +array.tie("/tmp/textfile.txt", File::RDWR|File::CREAT) { + array[4] = "a new line 4" +} + +# The tied array can be manipulated like a normal array, +# so there is no need for a special API, and the recno_demo program +# to demonstrate is API is useless + + +# tied array demo: show how to use array with a tied file +filename = "db_file.txt" +lines = Array.new +File.unlink(filename) if File.exists?(filename) +lines.tie(filename, File::RDWR | File::CREAT) { + # first create a textfile to play with + lines[0] = "zero" + lines[1] = "one" + lines[2] = "two" + lines[3] = "three" + lines[4] = "four" + + # print the records in order. + # Opposed to perl, the tied array behaves exactly as a normal array + puts "\nOriginal" + for i in 0..(lines.length-1) + puts "#{i}: #{lines[i]}" + end + + #use push and pop + a = lines.pop + lines.push("last") + puts("The last line was [#{a}]") + + #use shift and unshift + a = lines.shift + lines.unshift("first") + puts("The first line was [#{a}]") + + # add record after record 2 + i = 2 + lines.insert(i + 1, "Newbie") + + # add record before record one + i = 1 + lines.insert(i, "New One") + + # delete record 3 + lines.delete_at(3) + + #now print the records in reverse order + puts "\nReverse" + (lines.length - 1).downto(0){ |i| + puts "#{i}: #{lines[i]}" + } + +} + + +# @@PLEAC@@_14.8 +# example to store complex data in a database +# uses marshall from the standard library +require "sdbm" +db = SDBM.open("pleac14-8-database", 0666) + +# convert the Objects into strings and back by using the Marshal module. +# Most normal objects can be converted out of the box, +# but not special things like procedure objects, +# IO instance variables, singleton objects + +db["Tom Christiansen"] = Marshal.dump(["book author", "tchrist@perl.com"]) +db["Tom Boutell"] = Marshal.dump(["shareware author", +"boutell@boutell.com"]) + +name1 = "Tom Christiansen" +name2 = "Tom Boutell" + +tom1 = Marshal.load(db[name1]) +tom2 = Marshal.load(db[name2]) + +puts "Two Toming: #{tom1} #{tom2}" + +if tom1[0] == tom2[0] && tom1[1] == tom2[1] + puts "You're having runtime fun with one Tom made two." +else + puts "No two Toms are ever alike" +end + +# To change parts of an entry, get the whole entry, change the parts, +# and save the whole entry back +entry = Marshal.load(db["Tom Boutell"]) +entry[0] = "Poet Programmer" +db["Tom Boutell"] = Marshal.dump(entry) +db.close + + +# @@PLEAC@@_14.9 +# example to make data persistent +# uses Marshal from the standard lib +# Stores the data in a simple file, +# see 14.8 on how to store it in a dbm file + +# The BEGIN block is executed before the rest of the script +# we use global variables here because local variables +# will go out of scope and are not accessible from the main script + +BEGIN { + $persistent_store = "persitence.dat" + begin + File.open($persistent_store) do |f| + $stringvariable1 = Marshal.load(f) + $arrayvariable2 = Marshal.load(f) + end + rescue + puts "Can not open #{$persistent_store}" + # Initialisation if this script runs the first time + $stringvariable1 = "" + $arrayvariable2 = [] + end +} + +END { + File.open($persistent_store, "w+") do |f| + Marshal.dump($stringvariable1, f) + Marshal.dump($arrayvariable2, f) + end +} + +# simple test program +puts $stringvariable1 +puts $arrayvariable2 +$stringvariable1 = "Hello World" +$arrayvariable2.push(5) +puts $stringvariable1 +puts $arrayvariable2 + + +# @@PLEAC@@_14.10 +#!/usr/bin/ruby -w +# Ruby has a dbi module with an architecture similar +# to the Perl dbi module: the dbi module provides an unified +# interface and uses specialized drivers for each dbms vendor +# +begin + DBI.connect("DBI:driver:driverspecific", "username", "auth") { + |dbh| + + dbh.do(SQL1) + + dbh.prepare(SQL2){ |sth| + sth.execute + sth.fetch {|row| + # ... + } + } # end of block finishes the statement handle + } # end of block closes the database connection +rescue DBI::DatabaseError => e + puts "dbi error occurred" + puts "Error code: #{e.err}" + puts "Error message: #{e.errstr}" +end + +#!/usr/bin/ruby -w +# dbusers - example for mysql which creates a table, +# fills it with values, retrieves the values back, +# and finally destroys the table. + +require "dbi" + +# replacement for the User::pwnt module +def getpwent + result = [] + File.open("/etc/passwd") {|file| + file.each_line {|line| + next if line.match(/^#/) + cols = line.split(":") + result.push([cols[2], cols[0]]) + } + } + result +end + +begin + DBI.connect("DBI:Mysql:pleacdatabase", "pleac", "pleacpassword") { + |conn| + + conn.do("CREATE TABLE users (uid INT, login CHAR(8))") + + users = getpwent + + conn.prepare("INSERT INTO users VALUES (?,?)") {|sth| + users.each {|entry| + sth.execute(entry[0], entry[1]) + } + } + + conn.execute("SELECT uid, login FROM users WHERE uid < 50") {|sth| + sth.fetch {|row| + puts row.collect {|col| + if col.nil? + "(null)" + else + col + end + }.join(", ") + } + } + + conn.do("DROP TABLE users") + } +rescue DBI::DatabaseError => e + puts "dbi error occurred" + puts "Error code: #{e.err}" + puts "Error message: #{e.errstr}" +end + + +# @@PLEAC@@_15.1 +# This test program demonstrates parsing program arguments. +# It uses the optparse library, which is included with ruby 1.8 +# It handles classic unix style and gnu style options +require 'optparse' + +@debugmode = false +@verbose = false + +ARGV.options do |opts| + opts.banner = "Usage: ruby #{$0} [OPTIONS] INPUTFILES" + + opts.on("-h", "--help", "show this message") { + puts opts + exit + } + # The OptionParser#on method is called with a specification of short + # options, of long options, a data type spezification and user help + # messages for this option. + # The method analyses the given parameter and decides what it is, + # so you can leave out the long option if you don't need it + opts.on("-v", "--[no-]verbose=[FLAG]", TrueClass, "run verbosly") { + |@verbose| # sets @verbose to true or false + } + opts.on("-D", "--DEBUG", TrueClass, "turns on debug mode" ){ + |@debugmode| # sets @debugmode to true + } + opts.on("-c", "--count=NUMBER", Integer, "how many times we do it" ){ + |@count| # sets @count to given integer + } + opts.on("-o", "--output=FILE", String, "file to write output to"){ + |@outputfile| # sets @outputfile to given string + } + opts.parse! +end + +# example to use the options in the main program +puts "Verbose is on" if @verbose +puts "Debugmode is on" if @debugmode +puts "Outfile is #{@outputfile}" if defined? @outputfile +puts "Count is #{@count}" if defined? @count +ARGV.each { |param| + puts "Got parameter #{param}" +} + + +# @@PLEAC@@_15.4 +buf = "\0" * 8 +$stdout.ioctl(0x5413, buf) +ws_row, ws_col, ws_xpixel, ws_ypixel = buf.unpack("S4") + +raise "You must have at least 20 characters" unless ws_col >= 20 +max = 0 +values = (1..5).collect { rand(20) } # generate an array[5] of rand values +for i in values + max = i if max < i +end +ratio = Float(ws_col-12)/max # chars per unit +for i in values + printf "%8.1f %s\n", i, "*" * (ratio*i) +end + +# gives, for example: +# 15.0 ******************************* +# 10.0 ********************* +# 5.0 ********** +# 14.0 ***************************** +# 18.0 ************************************** + + +# @@PLEAC@@_16.1 +output = `program args` # collect output into one multiline string +output = `program args`.split # collect output into array, one line per +element + +readme = IO.popen("ls") +output = "" +while readme.gets do + output += $_ +end +readme.close + +`fsck -y /dev/rsd1a` # BAD AND SCARY in Perl because it's managed by the shell + # I donna in Ruby ... + +# so the "clean and secure" version +readme, writeme = IO.pipe +pid = fork { + # child + $stdout = writeme + readme.close + exec('find', '..') +} +# parent +Process.waitpid(pid, 0) +writeme.close +while readme.gets do + # do something with $_ +end + + +# @@PLEAC@@_16.2 +status = system("xemacs #{myfile}") + +status = system("xemacs", myfile) + +system("cmd1 args | cmd2 | cmd3 >outfile") +system("cmd args <infile >outfile 2>errfile") + +# stop if the command fails +raise "$program exited funny: #{$?}" unless system("cmd", "args1", "args2") + +# get the value of the signal sent to the child +# even if it is a SIGINT or SIGQUIT +system(arglist) +raise "program killed by signal #{$?}" if ($? & 127) != 0 + +pid = fork { + trap("SIGINT", "IGNORE") + exec("sleep", "10") +} +trap ("SIGINT") { + puts "Tsk tsk, no process interruptus" +} +Process.waitpid(pid, 0) + +# Ruby doesn't permit to lie to the program called by a 'system'. +# (ie specify what return argv[0] in C, $0 in Perl/Ruby ...) +# A (dirty) way is to create a link (under Unix), run this link and +# erase it. Somebody has a best idea ? + + +# @@PLEAC@@_16.3 +exec("archive *.data") + +exec("archive", "accounting.data") + +exec("archive accounting.data") + + +# @@PLEAC@@_16.4 +# read the output of a program +IO.popen("ls") {|readme| + while readme.gets do + # ... + end +} +# or +readme = IO.popen("ls") +while readme.gets do + # ... +end +readme.close + +# "write" in a program +IO.popen("cmd args","w") {|pipe| + pipe.puts("data") + pipe.puts("foo") +} + +# close wait for the end of the process +read = IO.popen("sleep 10000") # child goes to sleep +read.close # and the parent goes to lala land + +writeme = IO.popen("cmd args", "w") +writeme.puts "hello" # program will get hello\n on STDIN +writeme.close # program will get EOF on STDIN + +# send in a pager (eg less) all output +$stdout = IO.popen("/usr/bin/less","w") +print "huge string\n" * 10000 + + +# @@PLEAC@@_16.5 +#----------------------------- +def head(lines = 20) + pid = open("|-","w") + if pid == nil + return + else + while gets() do + pid.print + lines -= 1 + break if lines == 0 + end + end + exit +end + +head(100) +while gets() do + print +end +#----------------------------- +1: > Welcome to Linux, version 2.0.33 on a i686 + +2: > + +3: > "The software required `Windows 95 or better', + +4: > so I installed Linux." +#----------------------------- +> 1: Welcome to Linux, Kernel version 2.0.33 on a i686 + +> 2: + +> 3: "The software required `Windows 95 or better', + +> 4: so I installed Linux." +#----------------------------- +#!/usr/bin/ruby +# qnumcat - demo additive output filters + +def number() + pid = open("|-","w") + if pid == nil + return + else + while gets() do pid.printf("%d: %s", $., $_); end + end + exit +end + +def quote() + pid = open("|-","w") + if pid == nil + return + else + while gets() do pid.print "> #{$_}" end + end + exit +end + +number() +quote() + +while gets() do + print +end +$stdout.close +exit + + +# @@PLEAC@@_16.6 +ARGV.map! { |arg| + arg =~ /\.(gz|Z)$/ ? "|gzip -dc #{arg}" : arg +} +for file in ARGV + fh = open(file) + while fh.gets() do + # ....... + end +end +#----------------------------- +ARGV.map! { |arg| + arg =~ %r#^\w+://# ? "|GET #{arg}" : arg # +} +for file in ARGV + fh = open(file) + while fh.gets() do + # ....... + end +end +#----------------------------- +pwdinfo = (`domainname` =~ /^(\(none\))?$/) ? '/etc/passwd' : '|ypcat passwd'; +pwd = open(pwdinfo); +#----------------------------- +puts "File, please? "; +file = gets().chomp(); +fh = open(file); + + +# @@PLEAC@@_16.7 +output = `cmd 2>&1` # with backticks +# or +ph = open("|cmd 2>&1") # with an open pipe +while ph.gets() { } # plus a read +#----------------------------- +output = `cmd 2>/dev/null` # with backticks +# or +ph = open("|cmd 2>/dev/null") # with an open pipe +while ph.gets() { } # plus a read +#----------------------------- +output = `cmd 2>&1 1>/dev/null` # with backticks +# or +ph = open("|cmd 2>&1 1>/dev/null") # with an open pipe +while ph.gets() { } # plus a read +#----------------------------- +output = `cmd 3>&1 1>&2 2>&3 3>&-` # with backticks +# or +ph = open("|cmd 3>&1 1>&2 2>&3 3>&-") # with an open pipe +while ph.gets() { } # plus a read +#----------------------------- +system("program args 1>/tmp/program.stdout 2>/tmp/program.stderr") +#----------------------------- +output = `cmd 3>&1 1>&2 2>&3 3>&-` +#----------------------------- +fd3 = fd1 +fd1 = fd2 +fd2 = fd3 +fd3 = undef +#----------------------------- +system("prog args 1>tmpfile 2>&1") +system("prog args 2>&1 1>tmpfile") +#----------------------------- +# system ("prog args 1>tmpfile 2>&1") +fd1 = "tmpfile" # change stdout destination first +fd2 = fd1 # now point stderr there, too +#----------------------------- +# system("prog args 2>&1 1>tmpfile") +fd2 = fd1 # stderr same destination as stdout +fd1 = "tmpfile" # but change stdout destination +#----------------------------- +# It is often better not to rely on the shell, +# because of portability, possible security problems +# and bigger resource usage. So, it is often better to use the open3 library. +# See below for an example. +# opening stdin, stdout, stderr +require "open3" +stdin, stdout, stderr = Open3.popen('cmd') + + +# @@PLEAC@@_16.8 +#----------------------------- +# Contrary to perl, we don't need to use a module in Ruby +fh = Kernel.open("|" + program, "w+") +fh.puts "here's your input\n" +output = fh.gets() +fh.close() +#----------------------------- +Kernel.open("|program"),"w+") # RIGHT ! +#----------------------------- +# Ruby has already object methods for I/O handles +#----------------------------- +begin + fh = Kernel.open("|" + program_and_options, "w+") +rescue + if ($@ ~= /^open/) + $stderr.puts "open failed : #{$!} \n #{$@} \n" + break + end + raise # reraise unforseen exception +end + + +# @@PLEAC@@_16.13 +#% kill -l +#HUP INT QUIT ILL TRAP ABRT BUS FPE KILL USR1 SEGV USR2 PIPE +#ALRM TERM CHLD CONT STOP TSTP TTIN TTOU URG XCPU XFSZ VTALRM +#PROF WINCH POLL PWR +#----------------------------- +#% ruby -e 'puts Signal.list.keys.join(" ")' +#PWR USR1 BUS USR2 TERM SEGV KILL POLL STOP SYS TRAP IOT HUP INT # +#WINCH XCPU TTIN CLD TSTP FPE IO TTOU PROF CHLD CONT PIPE ABRT +#VTALRM QUIT ILL XFSZ URG ALRM +#----------------------------- +# After that, the perl script create an hash equivalent to Signal.list, +# and an array. The array can be obtained by : +signame = [] +Signal.list.each { |name, i| signame[i] = name } + + +# @@PLEAC@@_16.14 +Process.kill(9, pid) # send $pid a signal 9 +Process.kill(-1, Process.getpgrp()) # send whole job a signal 1 +Process.kill("USR1", $$) # send myself a SIGUSR1 +Process.kill("HUP", pid1, pid2, pid3) # send a SIGHUP to processes in @pids +#----------------------------- +begin + Process.kill(0, minion) + puts "#{minion} is alive!" +rescue Errno::EPERM # changed uid + puts "#{minion} has escaped my control!"; +rescue Errno::ESRCH + puts "#{minion} is deceased."; # or zombied +rescue + puts "Odd; I couldn't check the status of #{minion} : #{$!}" +end + + +# @@PLEAC@@_16.15 +Kernel.trap("QUIT", got_sig_quit) # got_sig_quit = Proc.new { puts "Quit\n" } +trap("PIPE", "got_sig_quit") # def got_sig_pipe ... +trap("INT") { ouch++ } # increment ouch for every SIGINT +#----------------------------- +trap("INT", "IGNORE") # ignore the signal INT +#----------------------------- +trap("STOP", "DEFAULT") # restore default STOP signal handling + + +# @@PLEAC@@_16.16 +# the signal handler +def ding + trap("INT", "ding") + puts "\aEnter your name!" +end + +# prompt for name, overriding SIGINT +def get_name + save = trap("INT", "ding") + + puts "Kindly Stranger, please enter your name: " + name = gets().chomp() + trap("INT", save) + name +end + + +# @@PLEAC@@_16.21 +# implemented thanks to http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/1760 +require 'timeout' + +# we'll do something vastly more useful than cookbook to demonstrate timeouts +begin + timeout(5) { + waitsec = rand(10) + puts "Let's see if a sleep of #{waitsec} seconds is longer than 5 seconds..." + system("sleep #{waitsec}") + } + puts "Timeout didn't occur" +rescue Timeout::Error + puts "Timed out!" +end + + +# @@PLEAC@@_17.1 +# A basic TCP client connection +require 'socket' +begin + t = TCPSocket.new('www.ruby-lang.org', 'www') +rescue + puts "error: #{$!}" +else + # ... do something with the socket + t.print "GET / HTTP/1.0\n\n" + answer = t.gets(nil) + # and terminate the connection when we're done + t.close +end + +# Using the evil low level socket API +require 'socket' +# create a socket +s = Socket.new(Socket::AF_INET, Socket::SOCK_STREAM, 0) +# build the address of the remote machine +sockaddr_server = [Socket::AF_INET, 80, + Socket.gethostbyname('www.ruby-lang.org')[3], + 0, 0].pack("snA4NN") +# connect +begin + s.connect(sockaddr_server) +rescue + puts "error: #{$!}" +else + # ... do something with the socket + s.print "GET / HTTP/1.0\n\n" + # and terminate the connection when we're done + s.close +end + +# TCP connection with management of error (DNS) +require 'socket' +begin + client = TCPSocket.new('does not exists', 'www') +rescue + puts "error: #{$!}" +end + +# TCP connection with a time out +require 'socket' +require 'timeout' +begin + timeout(1) do #the server has one second to answer + client = TCPSocket.new('www.host.com', 'www') + end +rescue + puts "error: #{$!}" +end + + +# @@PLEAC@@_17.12 +require 'socket' + +class Preforker + attr_reader (:child_count) + + def initialize(prefork, max_clients_per_child, port, client_handler) + @prefork = prefork + @max_clients_per_child = max_clients_per_child + @port = port + @child_count = 0 + + @reaper = proc { + trap('CHLD', @reaper) + pid = Process.wait + @child_count -= 1 + } + + @huntsman = proc { + trap('CHLD', 'IGNORE') + trap('INT', 'IGNORE') + Process.kill('INT', 0) + exit + } + + @client_handler=client_handler + end + + def child_handler + trap('INT', 'EXIT') + @client_handler.setUp + # wish: sigprocmask UNblock SIGINT + @max_clients_per_child.times { + client = @server.accept or break + @client_handler.handle_request(client) + client.close + } + @client_handler.tearDown + end + + def make_new_child + # wish: sigprocmask block SIGINT + @child_count += 1 + pid = fork do + child_handler + end + # wish: sigprocmask UNblock SIGINT + end + + def run + @server = TCPserver.open(@port) + trap('CHLD', @reaper) + trap('INT', @huntsman) + loop { + (@prefork - @child_count).times { |i| + make_new_child + } + sleep .1 + } + end +end + +#----------------------------- +#!/usr/bin/ruby + +require 'Preforker' + +class ClientHandler + def setUp + end + + def tearDown + end + + def handle_request(client) + # do stuff + end +end + +server = Preforker.new(1, 100, 3102, ClientHandler.new) +server.run + + +# @@PLEAC@@_18.2 +require 'net/ftp' + +begin + ftp = Net::FTP::new("ftp.host.com") + ftp.login(username,password) + ftp.chdir(directory) + ftp.get(filename) + ftp.put(filename) +rescue Net::FTPError + $stderr.print "FTP failed: " + $! +ensure + ftp.close() if ftp +end + +# A better solution for a local use could be : +Net::FTP::new("ftp.host.com") do |ftp| + ftp.login(username,password) + ftp.chdir(directory) + ftp.get(filename) + ftp.put(filename) +end + +# If you have only one file to get, there is a simple solution : +require 'open-uri' +open("ftp://www.ruby-lang.org/path/filename") do |fh| + # read from filehandle fh +end +#-------------------------------------------- +# to wait a defined time for the connection, +# use the timeout module +require 'timeout' +begin + timeout(30){ + ftp = Net::FTP::new("ftp.host.com") + ftp.debug_mode = true + } +rescue Net::FTPError + $stderr.puts "Couldn't connect." +rescue Timeout::Error + $stderr.puts "Timeout while connecting to server." +end + +begin + ftp.login() +rescue Net::FTPError + $stderr.print "Couldn't authentificate.\n" +end + +begin + ftp.login(username) +rescue Net::FTPError + $stderr.print "Still couldn't authenticate.\n" +end + +begin + ftp.login(username, password) +rescue Net::FTPError + $stderr.print "Couldn't authenticate, even with explicit + username and password.\n" +end + +begin + ftp.login(username, password, account) +rescue Net::FTPError + $stderr.print "No dice. It hates me.\n" +end +#----------------------------- +ftp.put(localfile, remotefile) +#----------------------------- +# Sending data from STDIN is not directly supported +# by the ftp library module. A possible way to do it is to use the +# storlines method directly to send raw commands to the ftp server. +#----------------------------- +ftp.get(remotefile, localfile) +#----------------------------- +ftp.get(remotefile) { |data| puts data } +#----------------------------- +ftp.chdir("/pub/ruby") +print "I'm in the directory ", ftp.pwd(), "\n" +#----------------------------- +ftp.mkdir("/pub/ruby/new_dir") +#----------------------------- +lines = ftp.ls("/pub/ruby/") +# => ["drwxr-xr-x 2 matz users 4096 July 17 1998 1.0", ... ] + +latest = ftp.dir("/pub/ruby/*.tgz").sort.last + +ftp.nlst("/pub/ruby") +# => ["/pub/ruby/1.0", ... ] +#----------------------------- +ftp.quit() + + +# @@PLEAC@@_18.6 +require 'net/telnet' +t = Net::Telnet::new( "Timeout" => 10, + "Prompt" => /%/, + "Host" => host ) +t.login(username, password) +files = t.cmd("ls") +t.print("top") +process_string = t.waitfor(/\d+ processes/) +t.close +#----------------------------- +/[$%#>] \z/n +#----------------------------- +# In case of an error, the telnet module throws an exception. +# For control of the behavior in case of an error, +# you just need to catch the exceptions and do your custom +# error handling. +#----------------------------- +begin + telnet.login(username, password) +rescue TimeoutError + fail "Login failed !\n" +end +#----------------------------- +telnet.waitfor('/--more--/') +#----------------------------- +telnet.waitfor(String => 'greasy smoke', Timeout => 30) + + +# @@PLEAC@@_18.7 +require 'ping' + +puts "#{host} is alive.\n" if Ping.pingecho(host); +#----------------------------- +# the ping module only use TCP ping, not ICMP even if we are root +if Ping.pingecho("kingkong.com") + puts "The giant ape lives!\n"; +else + puts "All hail mighty Gamera, friend of children!\n"; +end + + +# @@PLEAC@@_19.1 +#!/usr/local/bin/ruby -w +# hiweb - load CGI class to decode information given by web server + +require 'cgi' + +cgi = CGI.new('html3') + +# get a parameter from a form +value = cgi.params['PARAM_NAME'][0] + +# output a document +cgi.out { + cgi.html { + cgi.head { cgi.title { "Howdy there!" } } + + cgi.body { cgi.p { "You typed: " + cgi.tt { + CGI.escapeHTML(value) } } } + } +} + +require 'cgi' +cgi = CGI.new +who = cgi.param["Name"][0] # first param in list +phone = cgi.param["Number"][0] +picks = cgi.param["Choices"] # complete list + +print cgi.header( 'type' => 'text/plain', + 'expires' => Time.now + (3 * 24 * 60 * 60) ) + + +# @@PLEAC@@_19.3 +#!/usr/local/bin/ruby -w +# webwhoami - show web user's id +require 'etc' +print "Content-Type: text/plain\n\n" +print "Running as " + Etc.getpwuid.name + "\n" + +# % ruby -wc cgi-script # just check syntax + +# % ruby -w cgi-script # params from stdin +# (offline mode: enter name=value pairs on standard input) +# name=joe +# number=10 +# ^D + +# % ruby -w cgi-script name=joe number=10 # run with mock form input +# % ruby -d cgi-script name=joe number=10 # ditto, under the debugger + +# POST method script in csh +# % (setenv HTTP_METHOD POST; ruby -w cgi-script name=joe number=10) +# POST method script in sh +# % HTTP_METHOD=POST perl -w cgi-script name=joe number=10 + + +# @@PLEAC@@_19.4 +# ruby has several security levels, the level "1" is similar to perls taint mode. +# It can be switched on by providing the -T command line parameter +# or by setting $SAFE to 1. Setting $SAFE to 2,3 or 4 restricts possible +# harmful operations further. + +#!/usr/bin/ruby -T +$SAFE = 1 +File.open(ARGV[0], "w") +# ruby warns with: +# taint1.rb:2:in `initialize': Insecure operation - initialize (SecurityError) + +$SAFE = 1 +file = ARGV[0] +unless /^([\w.-]+)$/.match(file) + raise "filename #{file} has invalid characters" +end +file = $1 +# In ruby, even the back reference from a regular expression stays tainted. +# you need to explicitly untaint the variable: +file.untaint +File.open(file, "w") + +# Race condition exists like in perl: +unless File.exists(filename) # Wrong because of race condition + File.open(filename, "w") +end + + + +# @@PLEAC@@_19.10 +preference_value = cgi.cookies["preference name"][0] + +packed_cookie = CGI::Cookie.new("name" => "preference name", + "value" => "whatever you'd like", + "expires" => Time.local(Time.now.year + 2, + Time.now.mon, Time.now.day, Time.now.hour, Time.now.min, Time.now.sec) ) + +cgi.header("cookie" => [packed_cookie]) + +#!/usr/local/bin/ruby -w +# ic_cookies - sample CGI script that uses a cookie +require 'cgi' + +cgi = CGI.new('html3') + +cookname = "favorite ice cream" +favorite = cgi.params["flavor"][0] +tasty = cgi.cookies[cookname][0] || 'mint' + +unless favorite + cgi.out { + cgi.html { + cgi.head { cgi.title { "Ice Cookies" } } + + cgi.body { + cgi.h1 { "Hello Ice Cream" } + + cgi.hr + + cgi.form { + cgi.p { "Please select a flavor: " + + cgi.text_field("flavor", tasty ) } + } + + cgi.hr + } + } + } +else + cookie = CGI::Cookie.new( "name" => cookname, + "value" => favorite, + "expires" => Time.local(Time.now.year + 2, +Time.now.mon, Time.now.day, Time.now.hour, Time.now.min, Time.now.sec) ) + cgi.out("cookie" => [cookie]) { + cgi.html { + cgi.head { cgi.title { "Ice Cookies" } } + + cgi.body { + cgi.h1 { "Hello Ice Cream" } + + cgi.p { "You chose as your favorite flavor `#{favorite}'." } + } + } + } +end + + +# @@PLEAC@@_20.9 +def templatefile(filename, fillings) + aFile = File.new(filename, "r") + text = aFile.read() + aFile.close() + pattern = Regexp.new('%%(.*?)%%') + text.gsub!(pattern) { + fillings[$1] || "" + } + text +end + +fields = { + 'username' => whats_his_name, + 'count' => login_count, + 'total' => minutes_used +} +puts templatefile('simple.template', fields) + +# @@INCOMPLETE@@ +# An example using databases is missing + diff --git a/bench/example.rubylex b/bench/example.rubylex Binary files differnew file mode 100644 index 0000000..b3cdfbe --- /dev/null +++ b/bench/example.rubylex diff --git a/bench/strange.c b/bench/strange.c new file mode 100644 index 0000000..42f339d --- /dev/null +++ b/bench/strange.c @@ -0,0 +1,90 @@ +#include "ruby.h" /*
+ /sLaSh *
+ oBfUsCaTeD RuBy *
+ cOpYrIgHt 2005 *
+bY SiMoN StRaNdGaArD *
+ #{X=320;Y=200;Z=20} */
+
+#define GUN1 42:
+#define GUN2 43:
+#define bo do
+#define when(gun) /**/
+#define DATA "p 'Hello embedded world'"
+#define DIRTY(argc,argv)\
+argc,argv,char=eval(\
+"#{DATA.read}\n[3,2,1]"\
+);sun=O.new\
+if(0)
+
+int
+sun[]={12,9,16,9,2,1,7,1,3,9,27,4, 13,2,11,5,4,1,25,
+5,0,1,14,9,15,4,26,9,23,2,17,6,31, 6,10,8,22,9,21,1,
+24,8,20,8,18,9,29,5,9,5,1,1,28,8,8,1,30, 9,6,8, 5,1,
+19,9,36,19,43, 9,34,11,50,19,48,18,49,9, 35,8,42,18,
+51,8,44,11,32, 11,47,9,37,1,39,9,38,19, 45,8,40,12,
+41,9,46,12,33,1,57,1,85,5,88,28,83,4,87, 6,62,28,89,
+9,80,28,60,21,52,21,72,29,54,21,75,8,70,29,58,28,65,
+9,91,8,74,29,79,2,77,1,53,1,81,5, 69,2,64,21, 86,29,
+67,9,59,1,61,5,73,6,76,28,56,21,68,29,78,29,63,5,66,
+28,90,29, 71,4,55,9,84,28,82,29,101,5, 103,9, 98,35,
+97,1,94,35,93,1,100,35,92,31,99,5,96,39,95,5,102,35};
+
+void run(int gun=0) { // [gun]=[:GUN1,:GUN2]
+ printf("run() %i\n", gun);
+ switch(gun) {
+ case GUN1 when(2)
+ printf("when2\n");
+ break; // end
+ case GUN2 when(3)
+ printf("when3\n");
+ break; // end
+ }
+}
+
+int main(int argc, char** argv) {
+ printf("hello world. number of arguments=%i\n", argc);
+ int fun=5;
+ bo {
+ fun -= 1; //.id - gun = fun
+ run(fun);
+ } while(fun>0);
+ ruby_init();
+ rb_eval_string(DATA);
+ return 0;
+}
+
+# if 0 // nobody reads un-defined code
+def goto*s;$s=[];Y.times{s=[];X.times{s<<[0]*3};$s<< s}end;A=0.5
+include Math;def u g,h,i,j,k,l;f,*m=((j-h).abs>(k-i).abs)?[proc{
+|n,o| g[o] [n ]=l },[h ,i ],[j,k]]:[proc{
+|p,q| g[ p][ q] =l} ,[ i,h ], [k,j]];b,a=m.sort
+c,d=a [1 ]-b [1 ],a [0 ]-b [0 ];d.times{|e|f.
+call( e+b[ 0] ,c* e/d+b [1])};end;V=0;def bo&u
+$u||= V; ;$u += 1+V ;; return u.call if$u>1;q=128.0
+;x=(V .. 255 ). map {| y|f1,z =sin(y.to_f*PI/q),
+sin(( y. to_f + 200 )*PI/( q));[(f1*30.0+110.0).
+to_i,((f1+z)*10.0+40.0).to_i,(z*20.0+120.0).to_i]};Y.times{|i|X.
+times{|j|i1=((i*0.3+150)*(j*1.1+50)/50.0).to_i;i2=((i*0.8+510)*(
+j*0.9+1060)/51.0).to_i;$s[i][j]=x[(i1*i2)%255].clone}};$a=(0..25).
+inject([]){|a,i|a<<(V..3).inject([]){|r,j|r<<$c[i*4+j]}};u.call;end
+I=LocalJumpError;def run*a,&b;return if a.size==V;if a[V]==666;$b=b
+elsif$b;$b.call;end;end;def main s,&u;$m=V;u.call rescue I;end
+def rb_eval_string(*a);end # you promised not to look here
+def ruby_init;q=2.0;l=((X**q)*A+(Y**q)*A)**A;V.upto(Y-4){|s|V.
+upto(X-4){|q|d=((q-X/A)**q+(s-Y/A)**q)**A;e=(cos(d*PI/(l/q))/q
++A)*3.0+1.0;v=2;f=v/e;a,p,b=$s[s],$s[s+1],$s[s+v];r=a[q][V]*e+
+p[q][V]+a[q+1][V]+b[q][V]+a[q+v][V]+b[q+v/v][V]+p[q+v][V]+b[q+
+v][V]*f;g=[a[q][V],b[q][V],a[q+v][V],b[q+v][V]];h=(g.max-g.min
+)*f;$s[s][q][V]=[[(r/(e+f+6.0)+A+(h*0.4)).to_i,255].min,V].max
+}};File.open("res.ppm","w+"){|f|f.write(# secret.greetings :-)
+"P3\n# res.ppm\n#{X} #{Y}\n255\n"+$s.map{|a|a.map{|b|b.join' '
+}.join(' ')+"\n"}.join)};end;def switch i,&b;b.call;return unless
+defined?($m);b=(X*0.01).to_i;d=1.0/40.0;e=0.09;c=(Y*0.01).to_i
+a=$a.map{|(f,g,h,j)|[f*d,g*e,h*d,j*e]};a.each{|(k,l,m,n)|u($s,(k*X
+).to_i+b+i,(l*Y).to_i+c+i,(m*X).to_i+b+i,(n*Y).to_i+c+i,[Z]*3)}
+a.each{|(o,q,r,s)|u($s,(o*(X-Z)).to_i+i,(q*(Y-Z)).to_i+i,(r*(X-
+Z)).to_i+i,(s*(Y-Z)).to_i+i,[(1<<8)-1]*3)};end;Q=Object;class
+Regexp;def []=(v,is);is.each{|s|Q.send(:remove_const,s)if Q.
+const_defined? s;Q.const_set(s,v)};end;end;def int*ptr;666
+end;class O;def []=(a,b=nil);$c=a;end;end;alias:void:goto
+#endif // pretend as if you havn't seen anything
diff --git a/bench/strange.ruby b/bench/strange.ruby new file mode 100644 index 0000000..6ff93ee --- /dev/null +++ b/bench/strange.ruby @@ -0,0 +1,328 @@ +a.each{|el|anz[el]=anz[el]?anz[el]+1:1}
+while x<10000
+#a bis f dienen dazu die Nachbarschaft festzulegen. Man stelle sich die #Zahl von 1 bis 64 im Binärcode vor 1 bedeutet an 0 aus
+ b=(p[x]%32)/16<1 ? 0 : 1
+
+ (x-102>=0? n[x-102].to_i : 0)*a+(x-101>=0?n[x-101].to_i : 0)*e+n[x-100].to_i+(x-99>=0? n[x-99].to_i : 0)*f+(x-98>=0? n[x-98].to_i : 0)*a+
+ n[x+199].to_i*b+n[x+200].to_i*d+n[x+201].to_i*b
+
+#und die Ausgabe folgt
+g=%w{}
+x=0
+
+while x<100
+ puts"#{g[x]}"
+ x+=1
+end
+
+puts""
+sleep(10)
+
+1E1E1
+puts 30.send(:/, 5) # prints 6
+
+"instance variables can be #@included, #@@class_variables\n and #$globals as well."
+`instance variables can be #@included, #@@class_variables\n and #$globals as well.`
+'instance variables can be #@included, #@@class_variables\n and #$globals as well.'
+/instance variables can be #@included, #@@class_variables\n and #$globals as well./mousenix
+:"instance variables can be #@included, #@@class_variables\n and #$globals as well."
+:'instance variables can be #@included, #@@class_variables\n and #$globals as well.'
+%'instance variables can be #@included, #@@class_variables\n and #$globals as well.'
+%q'instance variables can be #@included, #@@class_variables\n and #$globals as well.'
+%Q'instance variables can be #@included, #@@class_variables\n and #$globals as well.'
+%w'instance variables can be #@included, #@@class_variables\n and #$globals as well.'
+%W'instance variables can be #@included, #@@class_variables\n and #$globals as well.'
+%s'instance variables can be #@included, #@@class_variables\n and #$globals as well.'
+%r'instance variables can be #@included, #@@class_variables\n and #$globals as well.'
+%x'instance variables can be #@included, #@@class_variables\n and #$globals as well.'
+
+#%W[ but #@0illegal_values look strange.]
+
+%s#ruby allows strange#{constructs}
+%s#ruby allows strange#$constructs
+%s#ruby allows strange#@@constructs
+
+%r\VERY STRANGE!\x00
+%x\VERY STRANGE!\x00
+
+~%r#<XMP>#i .. ~%r#</XMP>#i;
+
+a = <<"EOF"
+This is a multiline #$here document
+terminated by EOF on a line by itself
+EOF
+
+a = <<'EOF'
+This is a multiline #$here document
+terminated by EOF on a line by itself
+EOF
+
+b=(p[x] %32)/16<1 ? 0 : 1
+
+#<<""
+<<"X"
+#{test}
+#@bla
+#die suppe!!!
+\xfffff
+
+
+super <<-EOE % [
+ EOE
+
+<<X
+X
+X
+%s(uninter\)pre\ted)
+%q(uninter\)pre\ted)
+%Q(inter\)pre\ted)
+:"inter\)pre\ted"
+:'uninter\'pre\ted'
+
+%q[haha! [nesting [rocks] ] ! ]
+
+%Q[hehe! #{ %Q]nesting #{"really"} rocks] } ! ]
+
+"but it #{<<may} break"
+the code.
+may
+
+# this is a known bug.
+p <<this
+but it may break #{<<that}
+code.
+that
+this
+that
+
+##################################################################
+class NP
+def initialize a=@p=[], b=@b=[]; end
+def +@;@b<<1;b2c end;def-@;@b<<0;b2c end
+def b2c;if @b.size==8;c=0;@b.each{|b|c<<=1;c|=b};send(
+ 'lave'.reverse,(@p.join))if c==0;@p<<c.chr;@b=[] end
+ self end end ; begin _ = NP.new end
+c
+# ^ This is a bug :(
+
+# The Programming Language `NegaPosi'
++-+--++----+--+-+++--+-------+--++--+++---+-+++-+-+-+++-----+++-_
++--++++--+---++-+-+-+++--+--+-+------+--++++-++---++-++---++-++-_
++++--++-+-+--++--+++--+------+----+--++--+++-++-+----++------+--_
+-+-+----+++--+--+----+--+--+-++-++--+++-++++-++-----+-+-+----++-_
+---------+-+---- _
+##################################################################
+
+
+# date: 03/18/2004
+# title: primes less than 1000 ( 2005 Obfuscated Ruby Contest )
+# author: Jim Lawless
+# email: jimbo at radiks dotski net
+# comments: This program will display all positive prime integers
+# less than 1000. Program licens is the same as the Ruby
+# license ( http://www.ruby-lang.org/en/LICENSE.txt )
+
+ $e=""
+
+def a()
+ $a=$a+1
+end
+
+def b()
+ $a=$a+5
+end
+
+def c()
+ $e=$e+$a.chr
+end
+
+def d()
+ $a=10
+end
+
+def e()
+ $a=$a+16
+end
+
+d;e;b;a;a;a;a;a;c;d;e;e;e;e;e;e;b;a;a;a;a;c;d;e;e;e;a;a;a;c;d;e;e;b;b;
+a;c;d;c;d;e;e;e;e;e;e;b;b;a;a;a;c;d;e;e;e;e;e;b;b;a;a;a;a;c;d;e;e;e;e;
+e;b;b;a;a;a;a;a;c;d;e;e;e;e;e;e;a;a;c;d;e;e;e;e;e;b;b;a;c;d;e;b;a;c;
+d;e;b;a;a;a;a;a;c;d;e;e;e;e;e;e;b;a;a;a;a;c;d;e;e;e;a;a;c;d;e;e;b;a;a;
+c;d;e;e;b;a;c;d;e;e;b;a;c;d;e;e;b;a;c;d;c;d;e;b;a;a;a;a;a;c;d;e;e;e;e;e;b;
+b;a;a;a;a;a;c;d;e;e;e;a;a;a;c;d;e;e;b;a;a;a;c;d;c;d;e;b;a;a;a;a;a;c;d;e;
+e;e;e;e;b;b;a;a;c;d;e;e;e;e;e;e;a;a;a;a;a;c;d;e;e;e;e;e;e;b;b;a;c;
+d;e;e;e;e;e;e;a;a;a;a;c;d;e;e;e;e;e;b;a;a;a;a;a;c;d;e;e;e;a;a;a;c;d;e;
+e;b;a;c;d;c;d;e;e;e;e;e;e;b;b;a;a;a;c;d;e;e;e;e;e;b;b;a;a;a;a;c;d;e;e;
+e;e;e;b;b;a;a;a;a;a;c;d;e;e;e;e;e;e;a;a;c;d;e;e;e;e;e;b;b;a;c;d;e;b;
+a;c;d;e;b;a;a;a;a;a;c;d;e;e;e;e;e;b;b;a;a;a;a;a;c;d;e;e;e;a;a;c;d;e;b;
+a;a;a;a;a;c;d;e;e;e;e;e;e;b;a;a;a;a;c;d;e;b;a;c;d;c;d;e;e;e;e;e;b;b;a;
+a;a;a;a;c;d;e;e;e;e;e;b;b;a;a;c;d;e;b;b;a;a;a;a;c;d;e;b;a;c;d;e;b;b;a;
+a;a;a;c;d;e;b;a;a;a;a;a;c;d;e;e;e;e;e;e;b;a;a;a;a;c;d;e;e;a;a;a;a;c;
+d;e;e;e;e;e;e;a;a;a;c;d;e;e;e;e;e;e;a;a;a;a;a;c;d;e;e;e;e;e;b;a;a;a;
+a;a;c;d;e;e;e;e;e;e;b;b;a;c;d;e;e;e;e;e;e;a;a;c;d;e;e;e;e;e;e;a;a;a;
+a;a;c;d;e;b;b;a;a;a;a;c;d;e;b;a;a;a;a;a;c;d;e;e;e;e;e;b;b;a;a;a;a;a;
+c;d;e;b;b;a;a;a;a;a;c;d;e;b;b;a;a;a;a;a;c;d;e;e;e;a;a;a;c;d;e;e;e;a;a;
+a;c;d;e;e;b;a;c;d;e;b;b;a;a;a;a;a;c;d;e;b;a;c;d;c;d;e;b;a;a;a;a;a;c;d;e;e;
+e;e;e;b;b;a;a;c;d;e;e;e;e;e;e;a;a;a;a;a;c;d;e;e;e;e;e;e;b;b;a;c;d;e;
+e;e;e;e;e;a;a;a;a;c;d;e;e;e;e;e;b;a;a;a;a;a;c;d;e;e;e;a;a;a;c;d;e;e;
+b;a;a;c;d;c;d;e;e;e;e;e;b;a;a;a;c;d;e;e;e;e;e;e;b;a;a;a;c;d;e;e;e;e;e;
+b;b;a;c;d;e;e;e;e;e;b;a;a;c;d;e;e;e;e;e;e;a;c;d;c;d;e;e;e;e;e;b;b;a;c;
+d;e;e;e;e;e;e;a;a;a;a;c;d;e;e;e;e;e;b;a;a;a;a;a;c;d;c;d;e;b;a;a;a;a;a;
+c;d;e;e;e;e;e;b;b;a;a;a;a;a;c;d;e;e;e;a;a;a;c;d;e;b;a;a;a;a;a;c;d;e;e;
+e;e;e;b;b;a;a;a;a;a;c;d;e;e;a;c;d;e;e;b;a;a;c;d;c;d;e;e;e;e;e;b;b;a;a;
+a;a;a;c;d;e;e;e;e;e;b;b;a;a;c;d;e;b;a;c;d;e;b;b;a;a;a;a;c;d;e;b;b;a;a;
+a;a;c;d;e;b;a;a;a;a;a;c;d;e;e;e;e;e;b;b;a;a;a;a;a;c;d;e;b;b;b;a;c;d;e;
+b;a;a;a;a;a;c;d;e;e;e;e;e;b;b;a;a;a;a;a;c;d;e;b;b;a;a;a;a;a;c;d;e;e;
+e;a;a;a;a;c;d;e;b;a;a;a;a;a;c;d;e;e;e;e;e;e;b;a;a;a;a;c;d;e;b;b;a;a;
+a;a;a;c;d;c;d;e;e;e;e;e;b;a;a;a;c;d;e;e;e;e;e;e;b;a;a;a;c;d;e;e;e;e;e;
+b;b;a;c;d;e;e;e;e;e;b;a;a;c;d;e;e;e;e;e;e;a;c;d;c;d;e;e;e;e;e;b;b;a;c;
+d;e;e;e;e;e;e;a;a;a;a;c;d;e;e;e;e;e;b;a;a;a;a;a;c;d;c;d;e;e;e;e;e;b;b;
+a;c;d;e;e;e;e;e;e;a;a;a;a;c;d;e;e;e;e;e;b;a;a;a;a;a;c;d;c;d;e;e;e;e;e;
+b;b;a;a;a;a;a;c;d;e;e;e;e;e;b;b;a;a;c;d;e;b;b;a;a;a;a;c;d;e;b;a;a;a;
+a;a;c;d;e;e;e;e;e;b;b;a;a;c;d;e;e;e;e;e;e;a;a;a;a;a;c;d;e;e;e;e;e;e;
+b;b;a;c;d;e;e;e;e;e;e;a;a;a;a;c;d;e;e;e;e;e;b;a;a;a;a;a;c;d;e;e;e;a;
+a;a;c;d;e;e;e;a;a;a;c;d;e;e;b;a;c;d;e;b;b;a;a;a;a;a;c;d;e;b;a;c;d;c;d;e;e;
+e;e;e;e;b;a;c;d;e;e;e;e;e;e;b;b;a;c;d;e;e;e;e;e;e;b;a;a;a;a;a;c;d;e;
+e;e;e;e;e;b;a;a;a;a;c;d;e;b;a;c;d;e;b;a;a;a;c;d;e;b;a;a;a;a;c;d;e;e;e;
+e;e;e;e;a;c;d;e;b;a;a;a;a;a;c;d;e;e;e;e;e;e;b;a;a;a;a;c;d;e;e;e;e;e;
+e;e;a;a;a;c;d;e;b;a;c;d;e;e;e;e;e;b;b;a;a;a;a;a;c;d;e;e;e;e;e;e;b;a;
+a;a;a;c;d;e;b;a;c;d;e;e;e;e;e;e;b;a;c;d;e;e;e;e;e;e;b;a;a;a;c;d;e;e;e;
+e;e;b;b;a;a;a;a;a;c;d;e;e;e;e;e;e;a;a;a;c;d;e;e;e;e;e;b;b;a;c;d;e;b;
+a;a;a;c;d;c;d;e;e;e;e;e;b;b;a;c;d;e;e;e;e;e;e;a;a;a;a;c;d;e;e;e;e;e;b;
+a;a;a;a;a;c;d;c;d;e;b;a;a;a;a;a;c;d;e;e;e;e;e;e;b;a;a;a;a;c;d;e;b;a;c;
+d;e;e;e;a;a;a;c;d;e;b;a;c;d;e;b;a;a;a;a;a;c;d;e;e;e;e;e;e;b;a;a;a;a;c;
+d;e;b;a;c;d;e;e;a;c;d;e;b;a;c;d;e;e;b;a;a;c;d;c;d;e;e;e;e;e;b;b;a;c;d;e;e;e;
+e;e;e;a;a;a;a;c;d;e;e;e;e;e;b;a;a;a;a;a;c;d;c;d;e;b;a;c;d;e;b;a;c;d;e;b;
+a;c;d;e;b;a;c;d;e;b;a;c;d;e;b;a;c;d;e;b;a;c;eval $e
+
+$_=%{q,l= %w{Ruby\\ Quiz Loader}
+n,p,a= "\#{q.do#{%w{w a n c}.sort{|o,t|t<=>o}}se.d\x65l\x65t\x65(' ')}.com/",
+{"bmJzcA==\n".\x75np\x61ck("m")[0]=>" ","bHQ=\n".\x75np\x61ck((?n-1).chr)[0]=>
+:<,"Z3Q=\n".\x75np\x61ck("m")[0]=>:>,"YW1w\n".\x75np\x61ck((?l+1).chr)[0]=>:&},
+[[/^\\s+<\\/div>.+/m,""],[/^\\s+/,""],[/\n/,"\n\n"],[/<br \\/>/,"\n"],
+[/<hr \\/>/,"-="*40],[/<[^>]+>/,""],[/^ruby/,""],[/\n{3,}/,"\n\n"]];p\165ts"
+\#{l[0..-3]}ing...\n\n";send(Kernel.methods.find_all{|x|x[0]==?e}[-1],
+"re\#{q[5...8].downcase}re '111112101110-117114105'.scan(/-|\\\\d{3}/).
+inject(''){|m,v|v.length>1?m+v.to_i.chr: m+v}");o#{%w{e P}.sort.join.downcase
+}n("http://www.\#{n}"){|w|$F=w.read.sc\x61n(/li>.+?"([^"]+)..([^<]+)/)};\160uts\
+"\#{q}\n\n";$F.\145\141ch{|e|i=e[0][/\\d+/];s="%2s. %s"%[i,e[1]];i.to_i%2==0 ?
+\160ut\x73(s) : #{%w{s p}[-1]}rint("%-38s "%s)};p\x72\x69\x6et"\n? ";e\x76al(
+['puts"\n\#{l[0..3]}ing...\n\n"','$c=gets.chomp.to_i'].sort.join(";"));#{111.chr
+}pen("http://www.\#{n}"+$F[$c-1][0]){|n|$_=n.read[/^\\s+<span.+/m];#{('a'.."z").
+to_a[10-5*2]}.e\141ch{|(z,f)|\x67sub!(z,f)};\147sub!(/&(\\w+);/){|y|p.
+ke\171\077($1)?p[$1]:y};while$_=~/([^\n]{81,})/:z=$1.dup;f=$1.dup;f[f.rindex(
+" ",80),1]="\n";f.s\165b!(/\n[ \t]+/,"\n");s\165b!(/\#{R\x65g\x65xp.
+\x65scap\x65(z)}/,f)end};while\040\163ub!(/^(?:[^\n]*\n){20}/, ""):puts"\#$&
+--\x4dO\x52E--";g=$_;g#{"\145"}ts;;#{"excited"[0..4].delete("c")}\040if$_[0]==?q
+$_=g;end;$_.d#{"Internet Service Provider".scan(/[A-Z]/).join.downcase
+}lay};eval$_
+
+ d=[30644250780,9003106878,
+ 30636278846,66641217692,4501790980,
+ 671_24_603036,131_61973916,66_606629_920,
+ 30642677916,30643069058];a,s=[],$*[0]
+ s.each_byte{|b|a<<("%036b"%d[b.
+ chr.to_i]).scan(/\d{6}/)}
+ a.transpose.each{ |a|
+ a.join.each_byte{\
+ |i|print i==49?\
+ ($*[1]||"#")\
+ :32.chr}
+ puts
+ }
+
+#! /usr/bin/env ruby
+# License: If Ruby is licensed to the general public in a certain way, this is also licensed in that way.
+require'zlib';eval(Zlib::Inflate.inflate("x\332\355WKo\333F\020\276\367W\250\262\001\222\tM\357\246M\017\242\211\242h\200\036\212`\201\026\350\205`\f=h\233\301Zt%\273A-2\277\275\363\315\222\334\241,#v\214\366T\331\262\326\303y\3177\263\243M\371\347]\265)\203UuYnoO\257Wo\203\364>[T\353U\265\276L\257\353\325\235-'\277\226\233ui\323Uy1\251\027\027\341\253\371\346r\e\245u\366\216\205f\263\367\357\336&\353\362S\010zr=\277\3315w\315]r[\237o\333\344c]\255#>\343O\025\352\037\334\177\341\367\364\271\t\003\245\337|\027\304\364aM@:\363\260\316>\237\232\323(\326\252(\327\253\t\275\323\332h\253\224V\306d\247\037\362\371\311}\321\314f\356\363C\016\311\342\365\361ij\026\037\313\345\355\3577\363e\231\224\363\345\325y\315\204]\263l\3620\177\317\241\024M\376\263\235o\267Et\222/\223%\037\213\374D\323\373M\3214Kv-\373<\361\026\233&\\\304\253,\354\270\263\314)\232\3748\311\247]z\216v\3136\235\306\323\243\035\262\263\214\332\f\024\342\257\327\345\264\230\205\313o36\3122\254e2\260\236\2610\202\354\037\260\256 (f=/\313:Z\024\245\313\244Zoo\347\353ey~]\336^\325\253-\a\273k\252fqv6\235\333j\276\355\236tV\252\230\377F\276\n\333\277\257\241\345\206\262\323\306G\273\352\340\203t\332\246\2441`'\316\316\266\245\275H\0032\377l\253\017,=42E\002\360\236\246\345_s;Y\274^\305\367Q\233\036\233\276\016\312\2450=\256=\305U\202\230\254\"\222\265\004\217\237~\373\345\017\"h\243\210\307j\235\251\205V8\353\304X\372!1CGc-\251\240\337\020\317\361#\036\023\n\2556\254Cg3\002}\265\356s\235\202K[K\022\020 \243\206\216\241p3\33255\350\232\036\030q$\233\344!\363\204^},$\023Xg\235:\364r1\"1\344\277\261\207\031(\301DE\260\344\026Y\177\345\036\221\204mP\263\266Mk\305\366\210%3\220\302S\322\306IR\316\377!\203 S\336\310\216\215\203\315\002-\211 5D2\257\210\302\321p\234\364\205\222Jj\220\022E\321h\347\223RQ*94K\022\243\314H`4{LV\003\021N\f\333\364I\347l\327UR\305t\340\332i>\241x=Mu4R\245\373\223\244\251NB\211\247\236\3465\253^bx\332Yc\263\252M\220b\253\220\310\004\331\242\020,`\005T\021Y\251P@\020\365Ax\310z\364\264\240\265vj2\037?0\v\"en\244\374\251\032\225\253v\346\253\3712\215\032\322(o\206~A\006\010\f\324\22357\026\"\316\024\365\021\360@\277:\363.$\f\342\016$\200\v\341\302\230\020\340\341\201K\017\270+i\326-\312\313j\235\n[\376({\330u\254\266\334\034\031\367%:CK\210{\311h\aQH\333Q\023\250\210;e\360\322\362\213\202\247\216\266\340C&(p\274HT7\336&B\352\300\036z\206\204\375 \032z\304\233\217\034\267AK\207R\363\213\324u\334\203\272h\234 \304&\364S\302]|\024\233b\000\023E\034\005\300!\330\2274\026\205\316\363\203\364\"\316\245!\242\360Y?4\204b\023.\2009\036X\300\213p\200]\304\324\200$^\204\025\222D\325X \363\324\004\223\205\207\241M\245\352\341(s\3415\260w\226\313=\2422 \200\177\344\355\211\3350\004\341\217\207\215r%x\030\302\304\230\335{#\250#o\204h\327;\220\242\275B%j&\343e\005\226/\r\200\035\035\206K\243\027\216Z\230\323.\335\356^!\vF\002K\366\246kG\321\364E\301\362\250\275a\f\031\207i%\216\342&ie\205\260\324}\272\252ho\222\306\370\362!}6\364C\003\2717\206'!.\315\036mhMm\370\252\241\365\221g\275\326A\302\254\270X,\371\353\232:\222\321\253\025\217v%\222\023!\243r\272\364(\376\177\236\374\233\363\3048\330b\241xdTp\325\321\377\3428F\234\214\263\357\255f\324\306\226\257\022\"\000\354\003\024C\207\na\353\240&O\305\376\004ncy\350\f\276\357+Q|\201bBi\206\277\345u\251\273\310\367\242\303*\204d\n\271}\016\2345r8\034\201[\343:>\364*\242\266\025+HZ\263e\212\0247q\357\310X\267[\333(9_o}P\201\324>\266\364\000\217hh\352\225a\213q\260\031\334\022sg\360\e\206\234B=\246\2421\341e\364\270\321\224\347\0056L\267\227)\244\210\307\027\257<\343\257\000\303\264u{\235\326\352i\303^\332\200\n\236\243a\277\034J#~S\335'2\371\001q\3745$\356\027^\371\325\344\331\036\362\004\267\330\251<\212\237\257\345kr\371\302d\362r\376\344d\252C\311\374R6\017e\375\005\271yAV\363/\257\345\261(\340hW\020\222\a\027k)60\354\217\363\3501\263rt\0364\025\025|\265\031\355\276d\357\3159\367\225\025\223U\273n\027\324\321H\031\030\036\357\356\377\010\266\337\374\003\3375Q\335"))
+#include "ruby.h" /*
+ /sLaSh *
+ oBfUsCaTeD RuBy *
+ cOpYrIgHt 2005 *
+bY SiMoN StRaNdGaArD *
+ #{X=320;Y=200;Z=20} */
+
+#define GUN1 42:
+#define GUN2 43:
+#define bo do
+#define when(gun) /**/
+#define DATA "p 'Hello embedded world'"
+#define DIRTY(argc,argv)\
+argc,argv,char=eval(\
+"#{DATA.read}\n[3,2,1]"\
+);sun=O.new\
+if(0)
+
+int
+sun[]={12,9,16,9,2,1,7,1,3,9,27,4, 13,2,11,5,4,1,25,
+5,0,1,14,9,15,4,26,9,23,2,17,6,31, 6,10,8,22,9,21,1,
+24,8,20,8,18,9,29,5,9,5,1,1,28,8,8,1,30, 9,6,8, 5,1,
+19,9,36,19,43, 9,34,11,50,19,48,18,49,9, 35,8,42,18,
+51,8,44,11,32, 11,47,9,37,1,39,9,38,19, 45,8,40,12,
+41,9,46,12,33,1,57,1,85,5,88,28,83,4,87, 6,62,28,89,
+9,80,28,60,21,52,21,72,29,54,21,75,8,70,29,58,28,65,
+9,91,8,74,29,79,2,77,1,53,1,81,5, 69,2,64,21, 86,29,
+67,9,59,1,61,5,73,6,76,28,56,21,68,29,78,29,63,5,66,
+28,90,29, 71,4,55,9,84,28,82,29,101,5, 103,9, 98,35,
+97,1,94,35,93,1,100,35,92,31,99,5,96,39,95,5,102,35};
+
+void run(int gun=0) { // [gun]=[:GUN1,:GUN2]
+ printf("run() %i\n", gun);
+ switch(gun) {
+ case GUN1 when(2)
+ printf("when2\n");
+ break; // end
+ case GUN2 when(3)
+ printf("when3\n");
+ break; // end
+ }
+}
+
+int main(int argc, char** argv) {
+ printf("hello world. number of arguments=%i\n", argc);
+ int fun=5;
+ bo {
+ fun -= 1; //.id - gun = fun
+ run(fun);
+ } while(fun>0);
+ ruby_init();
+ rb_eval_string(DATA);
+ return 0;
+}
+
+#if 0 // nobody reads un-defined code
+def goto*s;$s=[];Y.times{s=[];X.times{s<<[0]*3};$s<< s}end;A=0.5
+include Math;def u g,h,i,j,k,l;f,*m=((j-h).abs>(k-i).abs)?[proc{
+|n,o| g[o] [n ]=l },[h ,i ],[j,k]]:[proc{
+|p,q| g[ p][ q] =l} ,[ i,h ], [k,j]];b,a=m.sort
+c,d=a [1 ]-b [1 ],a [0 ]-b [0 ];d.times{|e|f.
+call( e+b[ 0] ,c* e/d+b [1])};end;V=0;def bo&u
+$u||= V; ;$u += 1+V ;; return u.call if$u>1;q=128.0
+;x=(V .. 255 ). map {| y|f1,z =sin(y.to_f*PI/q),
+sin(( y. to_f + 200 )*PI/( q));[(f1*30.0+110.0).
+to_i,((f1+z)*10.0+40.0).to_i,(z*20.0+120.0).to_i]};Y.times{|i|X.
+times{|j|i1=((i*0.3+150)*(j*1.1+50)/50.0).to_i;i2=((i*0.8+510)*(
+j*0.9+1060)/51.0).to_i;$s[i][j]=x[(i1*i2)%255].clone}};$a=(0..25).
+inject([]){|a,i|a<<(V..3).inject([]){|r,j|r<<$c[i*4+j]}};u.call;end
+I=LocalJumpError;def run*a,&b;return if a.size==V;if a[V]==666;$b=b
+elsif$b;$b.call;end;end;def main s,&u;$m=V;u.call rescue I;end
+def rb_eval_string(*a);end # you promised not to look here
+def ruby_init;q=2.0;l=((X**q)*A+(Y**q)*A)**A;V.upto(Y-4){|s|V.
+upto(X-4){|q|d=((q-X/A)**q+(s-Y/A)**q)**A;e=(cos(d*PI/(l/q))/q
++A)*3.0+1.0;v=2;f=v/e;a,p,b=$s[s],$s[s+1],$s[s+v];r=a[q][V]*e+
+p[q][V]+a[q+1][V]+b[q][V]+a[q+v][V]+b[q+v/v][V]+p[q+v][V]+b[q+
+v][V]*f;g=[a[q][V],b[q][V],a[q+v][V],b[q+v][V]];h=(g.max-g.min
+)*f;$s[s][q][V]=[[(r/(e+f+6.0)+A+(h*0.4)).to_i,255].min,V].max
+}};File.open("res.ppm","w+"){|f|f.write(# secret.greetings :-)
+"P3\n# res.ppm\n#{X} #{Y}\n255\n"+$s.map{|a|a.map{|b|b.join' '
+}.join(' ')+"\n"}.join)};end;def switch i,&b;b.call;return unless
+defined?($m);b=(X*0.01).to_i;d=1.0/40.0;e=0.09;c=(Y*0.01).to_i
+a=$a.map{|(f,g,h,j)|[f*d,g*e,h*d,j*e]};a.each{|(k,l,m,n)|u($s,(k*X
+).to_i+b+i,(l*Y).to_i+c+i,(m*X).to_i+b+i,(n*Y).to_i+c+i,[Z]*3)}
+a.each{|(o,q,r,s)|u($s,(o*(X-Z)).to_i+i,(q*(Y-Z)).to_i+i,(r*(X-
+Z)).to_i+i,(s*(Y-Z)).to_i+i,[(1<<8)-1]*3)};end;Q=Object;class
+Regexp;def []=(v,is);is.each{|s|Q.send(:remove_const,s)if Q.
+const_defined? s;Q.const_set(s,v)};end;end;def int*ptr;666
+end;class O;def []=(a,b=nil);$c=a;end;end;alias:void:goto
+#endif // pretend as if you havn't seen anything
+=end
+
|