1 /**
2     Defines a string based dictionary list with conserved insertion order.
3 
4     Copyright: © 2012-2014 RejectedSoftware e.K.
5     License: Subject to the terms of the MIT license, as written in the included LICENSE file.
6     Authors: Sönke Ludwig
7 */
8 module memutils.dictionarylist;
9 
10 import memutils.helpers;
11 import memutils.allocators;
12 import memutils.refcounted;
13 import memutils.utils;
14 import memutils.vector;
15 import std.conv : to;
16 import std.exception : enforce;
17 
18 alias DictionaryListRef(KEY, VALUE, ALLOC = ThreadMem, bool case_sensitive = true, size_t NUM_STATIC_FIELDS = 8) = RefCounted!(DictionaryList!(KEY, VALUE, ALLOC, case_sensitive, NUM_STATIC_FIELDS), ALLOC);
19 
20 /**
21  * 
22     Behaves similar to $(D VALUE[string]) but the insertion order is not changed
23     and multiple values per key are supported.
24     
25     Note that despite case not being relevant for matching keys, iterating
26     over the list will yield the original case of the key that was put in.
27 
28     Insertion and lookup has O(n) complexity.
29 */
30 struct DictionaryList(KEY, VALUE, ALLOC = ThreadMem, bool case_sensitive = true, size_t NUM_STATIC_FIELDS = 8) {
31 	@disable this(this);
32 
33 	import std.typecons : Tuple;
34 	
35 	private {
36 		static struct Field { uint keyCheckSum; KEY key; VALUE value; }
37 		Field[NUM_STATIC_FIELDS] m_fields;
38 		size_t m_fieldCount;
39 		Field[] m_extendedFields;
40 		size_t m_extendedFieldCount;
41 	}
42 
43 	~this() {
44 		if (m_extendedFields) {
45 			auto sz = m_extendedFields.length;
46 			freeArray!(Field, ALLOC)(m_extendedFields.ptr[0 .. m_extendedFieldCount], sz);
47 		}
48 	}
49 	
50 	alias KeyType = KEY;
51 	alias ValueType = VALUE;
52 	
53 	struct FieldTuple { KeyType key; ValueType value; }
54 	
55 	/** The number of fields present in the map.
56     */
57 	@property size_t length() const { return m_fieldCount + m_extendedFields.length; }
58 	
59 	/** Removes the first field that matches the given key.
60     */
61 	void remove(KeyType key)
62 	{
63 		auto keysum = computeCheckSumI(key);
64 		auto idx = getIndex(m_fields[0 .. m_fieldCount], key, keysum);
65 		if( idx >= 0 ){
66 			auto slice = m_fields[0 .. m_fieldCount];
67 			removeFromArrayIdx(slice, idx);
68 			m_fieldCount--;
69 		} else {
70 			idx = getIndex(m_extendedFields, key, keysum);
71 			enforce(idx >= 0);
72 			removeFromArrayIdx(m_extendedFields, idx);
73 		}
74 	}
75 	
76 	/** Removes all fields that matches the given key.
77     */
78 	void removeAll(KeyType key)
79 	{
80 		auto keysum = computeCheckSumI(key);
81 		for (size_t i = 0; i < m_fieldCount;) {
82 			if (m_fields[i].keyCheckSum == keysum && matches(m_fields[i].key, key)) {
83 				auto slice = m_fields[0 .. m_fieldCount];
84 				removeFromArrayIdx(slice, i);
85 				m_fieldCount--;
86 			} else i++;
87 		}
88 		
89 		for (size_t i = 0; i < m_extendedFields.length;) {
90 			if (m_extendedFields[i].keyCheckSum == keysum && matches(m_extendedFields[i].key, key))
91 				removeFromArrayIdx(m_extendedFields, i);
92 			else i++;
93 		}
94 	}
95 	
96 	/** Adds a new field to the map.
97 
98         The new field will be added regardless of any existing fields that
99         have the same key, possibly resulting in duplicates. Use opIndexAssign
100         if you want to avoid duplicates.
101     */
102 	void insert()(auto const ref KeyType key, ValueType value)
103 	{
104 		auto keysum = computeCheckSumI(key);
105 		if (m_fieldCount < m_fields.length) {
106 			logTrace("Appending: ", value);
107 			m_fields[m_fieldCount++] = Field(keysum, *cast(KeyType*) &key, value);
108 			logTrace("Now have: ", m_fields, " with ", m_fieldCount);
109 		}
110 		else {
111 			grow(1);
112 			m_extendedFields[$-1] = Field(keysum, *cast(KeyType*) &key, value);
113 		}
114 	}
115 	
116 	/** Returns the first field that matches the given key.
117 
118         If no field is found, def_val is returned.
119     */
120 	ValueType get(KeyType key, lazy ValueType def_val = ValueType.init) {
121 		if (auto pv = key in this) return *pv;
122 		return def_val;
123 	}
124 	
125 	const(ValueType) get(in KeyType key, lazy const(ValueType) def_val = const(ValueType).init) const
126 	{
127 		if (auto pv = key in this) return *pv;
128 		return def_val;
129 	}
130 	
131 	/** Returns all values matching the given key.
132 
133         Note that the version returning an array will allocate using the same allocator for each call.
134     */
135 	Vector!(ValueType, ALLOC) getValuesAt()(auto const ref KeyType key)
136 	const {
137 		import std.array;
138 		auto ret = Vector!(ValueType, ALLOC)(0);
139 		this.opApply( (k, const ref v) {
140 				// static if (is(ValueType == string)) logTrace("Checking field: ", v);
141 				//logTrace("Looping ", k, " => ", v);
142 				if (matches(key, k)) {
143 					//logTrace("Appending: ", v);
144 					ret ~= v;
145 				}
146 				return 0;
147 			});
148 		//logTrace("Finished getValuesAt with: ", ret[]);
149 		return ret.move();
150 	}
151 	
152 	/// ditto
153 	void getValuesAt(in KeyType key, scope void delegate(const(ValueType)) del)
154 	const {
155 		uint keysum = computeCheckSumI(key);
156 		foreach (ref f; m_fields[0 .. m_fieldCount]) {
157 			if (f == Field.init) continue;
158 			if (f.keyCheckSum != keysum) continue;
159 			if (matches(f.key, key)) del(f.value);
160 		}
161 		foreach (ref f; m_extendedFields) {
162 			if (f.keyCheckSum != keysum) continue;
163 			if (matches(f.key, key)) del(f.value);
164 		}
165 	}
166 	/** Returns the first value matching the given key.
167     */
168 	inout(ValueType) opIndex(KeyType key)
169 	inout {
170 		auto pitm = key in this;
171 		enforce(pitm !is null, "Accessing non-existent key '"~ key.to!string ~"'.");
172 		return *pitm;
173 	}
174 	
175 	/** Adds or replaces the given field with a new value.
176     */
177 	ValueType opIndexAssign(ValueType val, KeyType key)
178 	{
179 		auto pitm = key in this;
180 		if( pitm ) *pitm = val;
181 		else if( m_fieldCount < m_fields.length ) m_fields[m_fieldCount++] = Field(computeCheckSumI(key), key, val);
182 		else {
183 			grow(1);
184 			m_extendedFields[$-1] = Field(computeCheckSumI(key), key, val);
185 		}
186 		return val;
187 	}
188 	
189 	/** Returns a pointer to the first field that matches the given key.
190     */
191 	inout(ValueType)* opBinaryRight(string op)(in KeyType key) inout if(op == "in") {
192 		return find(key);
193 	}
194 	
195 	inout(ValueType)* find(in KeyType key) inout 
196 	{
197 		uint keysum = computeCheckSumI(key);
198 		auto idx = getIndex(m_fields[0 .. m_fieldCount], key, keysum);
199 		if( idx >= 0 ) return &m_fields[idx].value;
200 		idx = getIndex(m_extendedFields, key, keysum);
201 		if( idx >= 0 ) return &m_extendedFields[idx].value;
202 		return null;
203 	}
204 	
205 	/// ditto
206 	bool opBinaryRight(string op)(KeyType key) inout if(op == "!in") {
207 		return !(key in this);
208 	}
209 	
210 	/** Iterates over all fields, including duplicates.
211     */
212 	int opApply(int delegate(KeyType key, ref ValueType val) del)
213 	{
214 		foreach (ref kv; m_fields[0 .. m_fieldCount]) {
215 			if (kv == Field.init) return 0;
216 			if (auto ret = del(kv.key, kv.value))
217 				return ret;
218 		}
219 		foreach (ref kv; m_extendedFields) {
220 			if (auto ret = del(kv.key, kv.value))
221 				return ret;
222 		}
223 		return 0;
224 	}
225 	
226 	int opApply(int delegate(const ref KeyType key, const ref ValueType val) del) const
227 	{
228 		foreach (ref kv; m_fields[0 .. m_fieldCount]) {
229 			if (kv == Field.init) return 0;
230 			if (auto ret = del(kv.key, kv.value))
231 				return ret;
232 		}
233 		foreach (ref kv; m_extendedFields) {
234 			if (auto ret = del(kv.key, kv.value))
235 				return ret;
236 		}
237 		return 0;
238 	}
239 	
240 	/// ditto
241 	int opApply(int delegate(ref ValueType val) del)
242 	{
243 		return this.opApply((KeyType key, ref ValueType val) { return del(val); });
244 	}
245 	
246 	/// ditto
247 	int opApply(int delegate(KeyType key, ref const(ValueType) val) del) const
248 	{
249 		return (cast() this).opApply(cast(int delegate(KeyType, ref ValueType)) del);
250 	}
251 	
252 	/// ditto
253 	int opApply(int delegate(ref const(ValueType) val) del) const
254 	{
255 		return (cast() this).opApply(cast(int delegate(ref ValueType)) del);
256 	}
257 
258 	bool opEquals(const ref DictionaryList!(KEY, VALUE, ALLOC) other) const
259 	{
260 		foreach (const ref KeyType key, const ref ValueType val; this)
261 		{
262 			bool found;
263 			other.getValuesAt(key, (const ValueType oval) {
264 					if (oval == val) {
265 						found = true;
266 						return;
267 					}
268 				});
269 			if (!found)
270 				return false;
271 		}
272 		
273 		return true;
274 	}
275 
276 	private void grow(size_t n) {
277 		if (m_extendedFields.length + n < m_extendedFieldCount) {
278 			m_extendedFields = m_extendedFields.ptr[0 .. m_extendedFields.length + n];
279 			return;
280 		}
281 		if (m_extendedFields.length > 0)
282 		{
283 			size_t oldsz = m_extendedFields.length;
284 			m_extendedFields = m_extendedFields.ptr[0 .. m_extendedFieldCount];
285 			m_extendedFieldCount = (m_extendedFieldCount + n)*3/2;
286 			logTrace("Extended field count: ", m_extendedFieldCount);
287 			m_extendedFields = reallocArray!(Field, ALLOC)(m_extendedFields, m_extendedFieldCount)[0 .. oldsz + n];
288 			memset(m_extendedFields.ptr + oldsz, 0, (m_extendedFieldCount-oldsz)*Field.sizeof);
289 		}
290 		else {
291 			m_extendedFieldCount = 16;
292 			m_extendedFields = allocArray!(Field, ALLOC)(16).ptr[0 .. n];
293 			memset(m_extendedFields.ptr, 0, m_extendedFieldCount*Field.sizeof);
294 		}
295 	}
296 
297 	private ptrdiff_t getIndex(in Field[] map, in KeyType key, uint keysum)
298 	const {
299 		foreach (i, ref const(Field) entry; map) {
300 			if (entry.keyCheckSum != keysum) continue;
301 			if (matches(entry.key, key)) return i;
302 		}
303 		return -1;
304 	}
305 	
306 	private static bool matches(in KeyType a, in KeyType b)
307 	{
308 		static if (case_sensitive) return a == b;
309 		else static if (is (KeyType == string)) return icmp2(a, b) == 0;
310 		else return a == b;
311 	}
312 	
313 	// very simple check sum function with a good chance to match
314 	// strings with different case equal
315 	private static uint computeCheckSumI(string s)
316 	@trusted {
317 		uint csum = 0;
318 		immutable(char)* pc = s.ptr, pe = s.ptr + s.length;
319 		for (; pc != pe; pc++) {
320 			static if (case_sensitive) csum ^= *pc;
321 			else csum ^= *pc & 0x1101_1111;
322 			csum = (csum << 1) | (csum >> 31);
323 		}
324 		return csum;
325 	}
326 	
327 	private static uint computeCheckSumI(T)(ref T obj)
328 	@trusted {
329 		return cast(uint)typeid(obj).getHash(&obj);
330 	}
331 }
332 
333 private:
334 
335 void removeFromArrayIdx(T)(ref T[] array, size_t idx)
336 {
337 	import std.algorithm : swap;
338 	foreach( j; idx+1 .. array.length) { 
339 		array[j-1] = array[j];
340 	}
341 	array[array.length-1].destroy();
342 	array = array.ptr[0 .. array.length-1];
343 }
344 
345 /// Special version of icmp() with optimization for ASCII characters
346 int icmp2(in string a, in string b)
347 @safe pure {
348 	import std.algorithm : min;
349 	import std.utf : decode;
350 	size_t i = 0, j = 0;
351 	
352 	// fast skip equal prefix
353 	size_t min_len = min(a.length, b.length);
354 	while ( i < min_len && a[i] == b[i] ) i++;
355 	if( i > 0 && (a[i-1] & 0x80) ) i--; // don't stop half-way in a UTF-8 sequence
356 	j = i;
357 	
358 	// compare the differing character and the rest of the string
359 	while (i < a.length && j < b.length){
360 		uint ac = cast(uint)a[i];
361 		uint bc = cast(uint)b[j];
362 		if( !((ac | bc) & 0x80) ){
363 			i++;
364 			j++;
365 			if( ac >= 'A' && ac <= 'Z' ) ac += 'a' - 'A';
366 			if( bc >= 'A' && bc <= 'Z' ) bc += 'a' - 'A';
367 			if( ac < bc ) return -1;
368 			else if( ac > bc ) return 1;
369 		} else {
370 			dchar acp = decode(a, i);
371 			dchar bcp = decode(b, j);
372 			if( acp != bcp ){
373 				acp = std.uni.toLower(acp);
374 				bcp = std.uni.toLower(bcp);
375 				if( acp < bcp ) return -1;
376 				else if( acp > bcp ) return 1;
377 			}
378 		}
379 	}
380 	
381 	if( i < a.length ) return 1;
382 	else if( j < b.length ) return -1;
383 	
384 	assert(i == a.length || j == b.length, "Strings equal but we didn't fully compare them!?");
385 	return 0;
386 }