September 9, 2013 02:25
diff --git a/gistfile1.rs b/gistfile1.rs
 /*
 running 7 tests
 test bench_decode_utf8_replace_none_condition ... bench: 1554 ns/iter (+/- 70)
 test bench_decode_utf8_replace_none_condition_no_amortized ... bench: 1974 ns/iter (+/- 267)
 test bench_decode_utf8_replace_none_condition_nocatch ... bench: 1536 ns/iter (+/- 51)
 test bench_decode_utf8_replace_none_default ... bench: 1460 ns/iter (+/- 50)
 test bench_decode_utf8_replace_once_condition ... bench: 3423 ns/iter (+/- 449)
 test bench_decode_utf8_replace_once_condition_no_amortized ... bench: 4041 ns/iter (+/- 160)
 test bench_decode_utf8_replace_once_default ... bench: 1586 ns/iter (+/- 40)

 test result: ok. 0 passed; 0 failed; 0 ignored; 7 measured
 */

 extern mod extra;

 #[deriving(Clone)]
 struct PutBack<A, T> {
    top: Option<A>,
    iter: T
 }

 impl<A, T> PutBack<A, T> {
    #[inline]
    pub fn new(it: T) -> PutBack<A, T> {
        PutBack{top: None, iter: it}
    }
    #[inline]
    pub fn put_back(&mut self, x: A) {
        self.top = Some(x)
    }
 }

 impl<A, T: Iterator<A>> Iterator<A> for PutBack<A, T> {
    #[inline]
    fn next(&mut self) -> Option<A> {
        match self.top.take() {
            None => self.iter.next(),
            top => top,
        }
    }
    #[inline]
    fn size_hint(&self) -> (uint, Option<uint>) {
        let (lo, hi) = self.iter.size_hint();
        if self.top.is_some() {
            (lo + 1, hi.map(|a| *a + 1))
        } else { (lo, hi) }
    }
 }



 /// .
 pub enum DecoderError {
    /// End of input with unfinished character
    IncompleteInput,
    ///Invalid byte in codepoint encoding
    InvalidSequence,
 }

 condition! {
    pub decode_error: (super::DecoderError, ~[u8]) -> ~str;
 }

 /// Handle Decoder Error
 pub trait DecoderHandler {
    /// The DecoderHandler may fail, raise a condition,
    /// or return a replacement string.
    fn error_str(&self, msg: DecoderError, sequence: &[u8]) -> ~str;
 }

 /// Raise the condition `decode_error` when an error is encountered.
 /// Decoding continues if the condition is resolved.
 #[deriving(Clone)]
 pub struct Strict;

 /// Use the decoder's default Replacement Character
 #[deriving(Clone)]
 pub struct ReplaceDefault;

 impl DecoderHandler for ReplaceDefault {
    #[inline(always)]
    fn error_str(&self, _: DecoderError, _: &[u8]) -> ~str {
        ~"\uFFFD"
    }
 }

 impl DecoderHandler for Strict {
    fn error_str(&self, msg: DecoderError, buf: &[u8]) -> ~str {
        decode_error::cond.raise((msg, buf.to_owned()))
    }
 }

 enum UTF8Expect {
    Ct, // Expecting 80 .. BF (one continuation byte)
    E0, // Expecting A0 .. BF then one continuation byte
    Et, // Expecting two continuation bytes
    ED, // Expecting 80 .. 9F then one continuation byte
    F0, // Expecting 90 .. BF then two continuation bytes
    Ft, // Expecting three continuation bytes
    F4, // Expecting 80 .. 8F then two continuation bytes
 }

 fn utf8_decode<E: DecoderHandler>(data: &[u8], err: E) -> ~str {
    use std::str;

    let mut res = std::vec::with_capacity(data.len());
    let mut it = PutBack::new(data.iter().map(|&x| x).enumerate());
    let mut valid = 0u;
    let mut start = None;
    let mut expt;
    let extend_valid = |i, step| {
        if start.is_none() { start = Some(i) }
        valid += step;
    };
    let flush = || {
        match start {
            Some(st) => res.push_all(data.slice(st, st + valid)),
            _ => ()
        }
        start = None; valid = 0
    };
    let replace = |msg, slc| {
        let repl = err.error_str(msg, slc);
        res.push_all(repl.as_bytes());
    };
    loop {
        let mut ch_start;
        match it.next() {
            None => break,
            Some((i, b)) => {
                match b {
                    0x00 .. 0x7F => { extend_valid(i, 1); loop },
                    0xC2 .. 0xDF => expt = Ct,
                    0xE0         => expt = E0,
                    0xE1 .. 0xEC |
                    0xEE .. 0xEF => expt = Et,
                    0xED         => expt = ED,
                    0xF0         => expt = F0,
                    0xF1 .. 0xF3 => expt = Ft,
                    0xF4         => expt = F4,
                    _            => {
                        flush();
                        replace(InvalidSequence, &[b]);
                        loop
                    }
                }
                ch_start = i;
            }
        }
        loop {
            let (i, b) = match it.next() {
                None => {
                    // Error: incomplete stream
                    flush();
                    replace(IncompleteInput, data.slice_from(ch_start));
                    return unsafe { str::raw::from_utf8_owned(res) }
                },
                Some(x) => x,
            };
            match (expt, b) {
                (E0, 0xA0 .. 0xBF) |
                (Et, 0x80 .. 0xBF) |
                (ED, 0x80 .. 0x9F) => expt = Ct,
                (F0, 0x90 .. 0xBF) |
                (Ft, 0x80 .. 0xBF) |
                (F4, 0x80 .. 0x8F) => expt = Et,
                (Ct, 0x80 .. 0xBF) => {
                    extend_valid(ch_start, i + 1 - ch_start);
                    break
                },
                _ => {
                    // Error: Invalid continuation byte
                    flush();
                    replace(InvalidSequence, data.slice(ch_start, i));
                    /* To be UTF-8 conformant we MUST not skip over valid start bytes here */
                    it.put_back((i, b));
                    break
                },
            }
        }
    }
    flush();
    unsafe { str::raw::from_utf8_owned(res) }
 }


 static ReplacementChar: char = '\ufffd';
 static ReplacementCharStr: &'static str = "\uFFFD";

 #[inline(never)]
 fn used<T>(_: T) { }

 static longer_text: [u8, ..210] = 
 [76, 105, 110, 103, 117, 105, 115, 116, 105, 99, 115, 32, 97, 110, 100, 32, 100, 105, 99, 116, 105, 111, 110, 97, 114, 105, 101, 115, 58, 10, 10, 32, 32, 195, 176, 105, 32, 196, 177, 110, 116, 201, 153, 203, 136, 110, 195, 166, 202, 131, 201, 153, 110, 201, 153, 108, 32, 102, 201, 153, 203, 136, 110, 201, 155, 116, 196, 177, 107, 32, 201, 153, 115, 111, 202, 138, 115, 105, 203, 136, 101, 196, 177, 202, 131, 110, 10, 32, 32, 89, 32, 91, 203, 136, 202, 143, 112, 115, 105, 108, 201, 148, 110, 93, 44, 32, 89, 101, 110, 32, 91, 106, 201, 155, 110, 93, 44, 32, 89, 111, 103, 97, 32, 91, 203, 136, 106, 111, 203, 144, 103, 201, 145, 93, 10, 10, 65, 80, 76, 58, 10, 10, 32, 32, 40, 40, 86, 226, 141, 179, 86, 41, 61, 226, 141, 179, 226, 141, 180, 86, 41, 47, 86, 226, 134, 144, 44, 86, 32, 32, 32, 32, 226, 140, 183, 226, 134, 144, 226, 141, 179, 226, 134, 146, 226, 141, 180, 226, 136, 134, 226, 136, 135, 226, 138, 131, 226, 128, 190, 226, 141, 142, 226, 141, 149, 226, 140, 136, 10, 10];

 static longer_text_one_error: [u8, ..210] = 
 [76, 105, 110, 103, 117, 105, 115, 116, 105, 99, 115, 32, 97, 110, 100, 32, 100, 105, 99, 116, 105, 111, 110, 97, 114, 105, 101, 115, 58, 10, 10, 32, 32, 195, 176, 105, 32, 196, 177, 110, 116, 201, 153, 203, 136, 110, 195, 166, 202, 131, 201, 153, 110, 201, 153, 108, 32, 102, 201, 153, 203, 136, 110, 201, 155, 116, 196, 177, 107, 32, 201, 153, 115, 111, 202, 138, 115, 105, 203, 136, 101, 196, 177, 202, 131, 110, 10, 32, 32, 89, 32, 91, 203, 136, 202, 143, 112, 115, 105, 108, 201, 148, 110, 93, 44, 32, 89, 101, 110, 32, 91, 106, 201, 155, 110, 93, 255, 32, 89, 111, 103, 97, 32, 91, 203, 136, 106, 111, 203, 144, 103, 201, 145, 93, 10, 10, 65, 80, 76, 58, 10, 10, 32, 32, 40, 40, 86, 226, 141, 179, 86, 41, 61, 226, 141, 179, 226, 141, 180, 86, 41, 47, 86, 226, 134, 144, 44, 86, 32, 32, 32, 32, 226, 140, 183, 226, 134, 144, 226, 141, 179, 226, 134, 146, 226, 141, 180, 226, 136, 134, 226, 136, 135, 226, 138, 131, 226, 128, 190, 226, 141, 142, 226, 141, 149, 226, 140, 136, 10, 10];

 #[bench]
 fn bench_decode_utf8_replace_none_default(b: &mut extra::test::BenchHarness) {
    do b.iter {
        used(utf8_decode(longer_text, ReplaceDefault));
    }
 }

 #[bench]
 fn bench_decode_utf8_replace_none_condition(b: &mut extra::test::BenchHarness) {
    do decode_error::cond.trap(|_| ReplacementCharStr.to_owned()).inside {
        do b.iter {
            used(utf8_decode(longer_text, Strict));
        }
    }
 }

 #[bench]
 fn bench_decode_utf8_replace_none_condition_nocatch(b: &mut extra::test::BenchHarness) {
    do b.iter {
        used(utf8_decode(longer_text, Strict));
    }
 }

 // Set up the condition handler for each iteration
 #[bench]
 fn bench_decode_utf8_replace_none_condition_no_amortized(b: &mut extra::test::BenchHarness) {
    do b.iter {
        do decode_error::cond.trap(|_| ReplacementCharStr.to_owned()).inside {
            used(utf8_decode(longer_text, Strict));
        }
    }
 }

 #[bench]
 fn bench_decode_utf8_replace_once_default(b: &mut extra::test::BenchHarness) {
    do b.iter {
        used(utf8_decode(longer_text_one_error, ReplaceDefault));
    }
 }

 #[bench]
 fn bench_decode_utf8_replace_once_condition(b: &mut extra::test::BenchHarness) {
    do decode_error::cond.trap(|_| ReplacementCharStr.to_owned()).inside {
        do b.iter {
            used(utf8_decode(longer_text_one_error, Strict));
        }
    }
 }

 // Set up the condition handler for each iteration
 #[bench]
 fn bench_decode_utf8_replace_once_condition_no_amortized(b: &mut extra::test::BenchHarness) {
    do b.iter {
        do decode_error::cond.trap(|_| ReplacementCharStr.to_owned()).inside {
            used(utf8_decode(longer_text_one_error, Strict));
        }
    }
 }
	/*
	running 7 tests
	test bench_decode_utf8_replace_none_condition ... bench: 1554 ns/iter (+/- 70)
	test bench_decode_utf8_replace_none_condition_no_amortized ... bench: 1974 ns/iter (+/- 267)
	test bench_decode_utf8_replace_none_condition_nocatch ... bench: 1536 ns/iter (+/- 51)
	test bench_decode_utf8_replace_none_default ... bench: 1460 ns/iter (+/- 50)
	test bench_decode_utf8_replace_once_condition ... bench: 3423 ns/iter (+/- 449)
	test bench_decode_utf8_replace_once_condition_no_amortized ... bench: 4041 ns/iter (+/- 160)
	test bench_decode_utf8_replace_once_default ... bench: 1586 ns/iter (+/- 40)

	test result: ok. 0 passed; 0 failed; 0 ignored; 7 measured
	*/

	extern mod extra;

	#[deriving(Clone)]
	struct PutBack<A, T> {
	top: Option<A>,
	iter: T
	}

	impl<A, T> PutBack<A, T> {
	#[inline]
	pub fn new(it: T) -> PutBack<A, T> {
	PutBack{top: None, iter: it}
	}
	#[inline]
	pub fn put_back(&mut self, x: A) {
	self.top = Some(x)
	}
	}

	impl<A, T: Iterator<A>> Iterator<A> for PutBack<A, T> {
	#[inline]
	fn next(&mut self) -> Option<A> {
	match self.top.take() {
	None => self.iter.next(),
	top => top,
	}
	}
	#[inline]
	fn size_hint(&self) -> (uint, Option<uint>) {
	let (lo, hi) = self.iter.size_hint();
	if self.top.is_some() {
	(lo + 1, hi.map(\|a\| *a + 1))
	} else { (lo, hi) }
	}
	}



	/// .
	pub enum DecoderError {
	/// End of input with unfinished character
	IncompleteInput,
	///Invalid byte in codepoint encoding
	InvalidSequence,
	}

	condition! {
	pub decode_error: (super::DecoderError, ~[u8]) -> ~str;
	}

	/// Handle Decoder Error
	pub trait DecoderHandler {
	/// The DecoderHandler may fail, raise a condition,
	/// or return a replacement string.
	fn error_str(&self, msg: DecoderError, sequence: &[u8]) -> ~str;
	}

	/// Raise the condition `decode_error` when an error is encountered.
	/// Decoding continues if the condition is resolved.
	#[deriving(Clone)]
	pub struct Strict;

	/// Use the decoder's default Replacement Character
	#[deriving(Clone)]
	pub struct ReplaceDefault;

	impl DecoderHandler for ReplaceDefault {
	#[inline(always)]
	fn error_str(&self, _: DecoderError, _: &[u8]) -> ~str {
	~"\uFFFD"
	}
	}

	impl DecoderHandler for Strict {
	fn error_str(&self, msg: DecoderError, buf: &[u8]) -> ~str {
	decode_error::cond.raise((msg, buf.to_owned()))
	}
	}

	enum UTF8Expect {
	Ct, // Expecting 80 .. BF (one continuation byte)
	E0, // Expecting A0 .. BF then one continuation byte
	Et, // Expecting two continuation bytes
	ED, // Expecting 80 .. 9F then one continuation byte
	F0, // Expecting 90 .. BF then two continuation bytes
	Ft, // Expecting three continuation bytes
	F4, // Expecting 80 .. 8F then two continuation bytes
	}

	fn utf8_decode<E: DecoderHandler>(data: &[u8], err: E) -> ~str {
	use std::str;

	let mut res = std::vec::with_capacity(data.len());
	let mut it = PutBack::new(data.iter().map(\|&x\| x).enumerate());
	let mut valid = 0u;
	let mut start = None;
	let mut expt;
	let extend_valid = \|i, step\| {
	if start.is_none() { start = Some(i) }
	valid += step;
	};
	let flush = \|\| {
	match start {
	Some(st) => res.push_all(data.slice(st, st + valid)),
	_ => ()
	}
	start = None; valid = 0
	};
	let replace = \|msg, slc\| {
	let repl = err.error_str(msg, slc);
	res.push_all(repl.as_bytes());
	};
	loop {
	let mut ch_start;
	match it.next() {
	None => break,
	Some((i, b)) => {
	match b {
	0x00 .. 0x7F => { extend_valid(i, 1); loop },
	0xC2 .. 0xDF => expt = Ct,
	0xE0 => expt = E0,
	0xE1 .. 0xEC \|
	0xEE .. 0xEF => expt = Et,
	0xED => expt = ED,
	0xF0 => expt = F0,
	0xF1 .. 0xF3 => expt = Ft,
	0xF4 => expt = F4,
	_ => {
	flush();
	replace(InvalidSequence, &[b]);
	loop
	}
	}
	ch_start = i;
	}
	}
	loop {
	let (i, b) = match it.next() {
	None => {
	// Error: incomplete stream
	flush();
	replace(IncompleteInput, data.slice_from(ch_start));
	return unsafe { str::raw::from_utf8_owned(res) }
	},
	Some(x) => x,
	};
	match (expt, b) {
	(E0, 0xA0 .. 0xBF) \|
	(Et, 0x80 .. 0xBF) \|
	(ED, 0x80 .. 0x9F) => expt = Ct,
	(F0, 0x90 .. 0xBF) \|
	(Ft, 0x80 .. 0xBF) \|
	(F4, 0x80 .. 0x8F) => expt = Et,
	(Ct, 0x80 .. 0xBF) => {
	extend_valid(ch_start, i + 1 - ch_start);
	break
	},
	_ => {
	// Error: Invalid continuation byte
	flush();
	replace(InvalidSequence, data.slice(ch_start, i));
	/* To be UTF-8 conformant we MUST not skip over valid start bytes here */
	it.put_back((i, b));
	break
	},
	}
	}
	}
	flush();
	unsafe { str::raw::from_utf8_owned(res) }
	}


	static ReplacementChar: char = '\ufffd';
	static ReplacementCharStr: &'static str = "\uFFFD";

	#[inline(never)]
	fn used<T>(_: T) { }

	static longer_text: [u8, ..210] =
	[76, 105, 110, 103, 117, 105, 115, 116, 105, 99, 115, 32, 97, 110, 100, 32, 100, 105, 99, 116, 105, 111, 110, 97, 114, 105, 101, 115, 58, 10, 10, 32, 32, 195, 176, 105, 32, 196, 177, 110, 116, 201, 153, 203, 136, 110, 195, 166, 202, 131, 201, 153, 110, 201, 153, 108, 32, 102, 201, 153, 203, 136, 110, 201, 155, 116, 196, 177, 107, 32, 201, 153, 115, 111, 202, 138, 115, 105, 203, 136, 101, 196, 177, 202, 131, 110, 10, 32, 32, 89, 32, 91, 203, 136, 202, 143, 112, 115, 105, 108, 201, 148, 110, 93, 44, 32, 89, 101, 110, 32, 91, 106, 201, 155, 110, 93, 44, 32, 89, 111, 103, 97, 32, 91, 203, 136, 106, 111, 203, 144, 103, 201, 145, 93, 10, 10, 65, 80, 76, 58, 10, 10, 32, 32, 40, 40, 86, 226, 141, 179, 86, 41, 61, 226, 141, 179, 226, 141, 180, 86, 41, 47, 86, 226, 134, 144, 44, 86, 32, 32, 32, 32, 226, 140, 183, 226, 134, 144, 226, 141, 179, 226, 134, 146, 226, 141, 180, 226, 136, 134, 226, 136, 135, 226, 138, 131, 226, 128, 190, 226, 141, 142, 226, 141, 149, 226, 140, 136, 10, 10];

	static longer_text_one_error: [u8, ..210] =
	[76, 105, 110, 103, 117, 105, 115, 116, 105, 99, 115, 32, 97, 110, 100, 32, 100, 105, 99, 116, 105, 111, 110, 97, 114, 105, 101, 115, 58, 10, 10, 32, 32, 195, 176, 105, 32, 196, 177, 110, 116, 201, 153, 203, 136, 110, 195, 166, 202, 131, 201, 153, 110, 201, 153, 108, 32, 102, 201, 153, 203, 136, 110, 201, 155, 116, 196, 177, 107, 32, 201, 153, 115, 111, 202, 138, 115, 105, 203, 136, 101, 196, 177, 202, 131, 110, 10, 32, 32, 89, 32, 91, 203, 136, 202, 143, 112, 115, 105, 108, 201, 148, 110, 93, 44, 32, 89, 101, 110, 32, 91, 106, 201, 155, 110, 93, 255, 32, 89, 111, 103, 97, 32, 91, 203, 136, 106, 111, 203, 144, 103, 201, 145, 93, 10, 10, 65, 80, 76, 58, 10, 10, 32, 32, 40, 40, 86, 226, 141, 179, 86, 41, 61, 226, 141, 179, 226, 141, 180, 86, 41, 47, 86, 226, 134, 144, 44, 86, 32, 32, 32, 32, 226, 140, 183, 226, 134, 144, 226, 141, 179, 226, 134, 146, 226, 141, 180, 226, 136, 134, 226, 136, 135, 226, 138, 131, 226, 128, 190, 226, 141, 142, 226, 141, 149, 226, 140, 136, 10, 10];

	#[bench]
	fn bench_decode_utf8_replace_none_default(b: &mut extra::test::BenchHarness) {
	do b.iter {
	used(utf8_decode(longer_text, ReplaceDefault));
	}
	}

	#[bench]
	fn bench_decode_utf8_replace_none_condition(b: &mut extra::test::BenchHarness) {
	do decode_error::cond.trap(\|_\| ReplacementCharStr.to_owned()).inside {
	do b.iter {
	used(utf8_decode(longer_text, Strict));
	}
	}
	}

	#[bench]
	fn bench_decode_utf8_replace_none_condition_nocatch(b: &mut extra::test::BenchHarness) {
	do b.iter {
	used(utf8_decode(longer_text, Strict));
	}
	}

	// Set up the condition handler for each iteration
	#[bench]
	fn bench_decode_utf8_replace_none_condition_no_amortized(b: &mut extra::test::BenchHarness) {
	do b.iter {
	do decode_error::cond.trap(\|_\| ReplacementCharStr.to_owned()).inside {
	used(utf8_decode(longer_text, Strict));
	}
	}
	}

	#[bench]
	fn bench_decode_utf8_replace_once_default(b: &mut extra::test::BenchHarness) {
	do b.iter {
	used(utf8_decode(longer_text_one_error, ReplaceDefault));
	}
	}

	#[bench]
	fn bench_decode_utf8_replace_once_condition(b: &mut extra::test::BenchHarness) {
	do decode_error::cond.trap(\|_\| ReplacementCharStr.to_owned()).inside {
	do b.iter {
	used(utf8_decode(longer_text_one_error, Strict));
	}
	}
	}

	// Set up the condition handler for each iteration
	#[bench]
	fn bench_decode_utf8_replace_once_condition_no_amortized(b: &mut extra::test::BenchHarness) {
	do b.iter {
	do decode_error::cond.trap(\|_\| ReplacementCharStr.to_owned()).inside {
	used(utf8_decode(longer_text_one_error, Strict));
	}
	}
	}