1 /**
2     Defines a string based dictionary list with conserved insertion order.
3 
4     Copyright: © 2012-2014 RejectedSoftware e.K.
5     License: Subject to the terms of the MIT license, as written in the included LICENSE file.
6     Authors: Sönke Ludwig
7 */
8 module memutils.dictionarylist;
9 
10 import core.stdc..string : memset, memcpy;
11 import memutils.helpers;
12 import memutils.allocators;
13 import memutils.refcounted;
14 import memutils.utils;
15 import memutils.vector;
16 import std.conv : to;
17 import std.exception : enforce;
18 
19 alias DictionaryListRef(KEY, VALUE, ALLOC = ThreadMem, bool case_sensitive = true, size_t NUM_STATIC_FIELDS = 8) = RefCounted!(DictionaryList!(KEY, VALUE, ALLOC, case_sensitive, NUM_STATIC_FIELDS), ALLOC);
20 
21 /**
22  * 
23     Behaves similar to $(D VALUE[string]) but the insertion order is not changed
24     and multiple values per key are supported.
25     
26     Note that despite case not being relevant for matching keys, iterating
27     over the list will yield the original case of the key that was put in.
28 
29     Insertion and lookup has O(n) complexity.
30 */
31 struct DictionaryList(KEY, VALUE, ALLOC = ThreadMem, bool case_sensitive = true, size_t NUM_STATIC_FIELDS = 8) {
32 	@disable this(this);
33 
34 	import std.typecons : Tuple;
35 	
36 	private {
37 		static struct Field { uint keyCheckSum; KEY key; VALUE value; }
38 		Field[NUM_STATIC_FIELDS] m_fields;
39 		size_t m_fieldCount;
40 		Field[] m_extendedFields;
41 		size_t m_extendedFieldCount;
42 	}
43 
44 	~this() {
45 		if (m_extendedFields) {
46 			auto sz = m_extendedFields.length;
47 			freeArray!(Field, ALLOC)(m_extendedFields.ptr[0 .. m_extendedFieldCount], sz);
48 		}
49 	}
50 	
51 	alias KeyType = KEY;
52 	alias ValueType = VALUE;
53 	
54 	struct FieldTuple { KeyType key; ValueType value; }
55 
56 	@property bool empty() const { return length == 0; }
57 
58 	/** The number of fields present in the map.
59     */
60 	@property size_t length() const { return m_fieldCount + m_extendedFields.length; }
61 	
62 	/** Removes the first field that matches the given key.
63     */
64 	void remove(KeyType key)
65 	{
66 		auto keysum = computeCheckSumI(key);
67 		auto idx = getIndex(m_fields[0 .. m_fieldCount], key, keysum);
68 		if( idx >= 0 ){
69 			auto slice = m_fields[0 .. m_fieldCount];
70 			removeFromArrayIdx(slice, idx);
71 			m_fieldCount--;
72 		} else {
73 			idx = getIndex(m_extendedFields, key, keysum);
74 			enforce(idx >= 0);
75 			removeFromArrayIdx(m_extendedFields, idx);
76 		}
77 	}
78 	
79 	/** Removes all fields that matches the given key.
80     */
81 	void removeAll(KeyType key)
82 	{
83 		auto keysum = computeCheckSumI(key);
84 		for (size_t i = 0; i < m_fieldCount;) {
85 			if (m_fields[i].keyCheckSum == keysum && matches(m_fields[i].key, key)) {
86 				auto slice = m_fields[0 .. m_fieldCount];
87 				removeFromArrayIdx(slice, i);
88 				m_fieldCount--;
89 			} else i++;
90 		}
91 		
92 		for (size_t i = 0; i < m_extendedFields.length;) {
93 			if (m_extendedFields[i].keyCheckSum == keysum && matches(m_extendedFields[i].key, key))
94 				removeFromArrayIdx(m_extendedFields, i);
95 			else i++;
96 		}
97 	}
98 	
99 	/** Adds a new field to the map.
100 
101         The new field will be added regardless of any existing fields that
102         have the same key, possibly resulting in duplicates. Use opIndexAssign
103         if you want to avoid duplicates.
104     */
105 	void insert()(auto const ref KeyType key, ValueType value)
106 	{
107 		auto keysum = computeCheckSumI(key);
108 		if (m_fieldCount < m_fields.length) {
109 			m_fields[m_fieldCount++] = Field(keysum, *cast(KeyType*) &key, value);
110 		}
111 		else {
112 			grow(1);
113 			m_extendedFields[$-1] = Field(keysum, *cast(KeyType*) &key, value);
114 		}
115 	}
116 	
117 	/** Returns the first field that matches the given key.
118 
119         If no field is found, def_val is returned.
120     */
121 	ValueType get(KeyType key, lazy ValueType def_val = ValueType.init) {
122 		if (auto pv = key in this) return *pv;
123 		return def_val;
124 	}
125 	
126 	const(ValueType) get(in KeyType key, lazy const(ValueType) def_val = const(ValueType).init) const
127 	{
128 		if (auto pv = key in this) return *pv;
129 		return def_val;
130 	}
131 	
132 	/** Returns all values matching the given key.
133 
134         Note that the version returning an array will allocate using the same allocator for each call.
135     */
136 	Vector!(ValueType, ALLOC) getValuesAt()(auto const ref KeyType key)
137 	const {
138 		import std.array;
139 		auto ret = Vector!(ValueType, ALLOC)(0);
140 		this.opApply( (k, const ref v) {
141 				// static if (is(ValueType == string)) logTrace("Checking field: ", v);
142 				//logTrace("Looping ", k, " => ", v);
143 				if (matches(key, k)) {
144 					//logTrace("Appending: ", v);
145 					ret ~= v;
146 				}
147 				return 0;
148 			});
149 		//logTrace("Finished getValuesAt with: ", ret[]);
150 		return ret.move();
151 	}
152 	
153 	/// ditto
154 	void getValuesAt(in KeyType key, scope void delegate(const(ValueType)) del)
155 	const {
156 		uint keysum = computeCheckSumI(key);
157 		foreach (ref f; m_fields[0 .. m_fieldCount]) {
158 			if (f == Field.init) continue;
159 			if (f.keyCheckSum != keysum) continue;
160 			if (matches(f.key, key)) del(f.value);
161 		}
162 		foreach (ref f; m_extendedFields) {
163 			if (f.keyCheckSum != keysum) continue;
164 			if (matches(f.key, key)) del(f.value);
165 		}
166 	}
167 	/** Returns the first value matching the given key.
168     */
169 	inout(ValueType) opIndex(KeyType key)
170 	inout {
171 		auto pitm = key in this;
172 		enforce(pitm !is null, "Accessing non-existent key '"~ key.to!string ~"'.");
173 		return *pitm;
174 	}
175 	
176 	/** Adds or replaces the given field with a new value.
177     */
178 	ValueType opIndexAssign(ValueType val, KeyType key)
179 	{
180 		auto pitm = key in this;
181 		if( pitm ) *pitm = val;
182 		else if( m_fieldCount < m_fields.length ) m_fields[m_fieldCount++] = Field(computeCheckSumI(key), key, val);
183 		else {
184 			grow(1);
185 			m_extendedFields[$-1] = Field(computeCheckSumI(key), key, val);
186 		}
187 		return val;
188 	}
189 	
190 	/** Returns a pointer to the first field that matches the given key.
191     */
192 	inout(ValueType)* opBinaryRight(string op)(in KeyType key) inout if(op == "in") {
193 		return find(key);
194 	}
195 	
196 	inout(ValueType)* find(in KeyType key) inout 
197 	{
198 		uint keysum = computeCheckSumI(key);
199 		auto idx = getIndex(m_fields[0 .. m_fieldCount], key, keysum);
200 		if( idx >= 0 ) return &m_fields[idx].value;
201 		idx = getIndex(m_extendedFields, key, keysum);
202 		if( idx >= 0 ) return &m_extendedFields[idx].value;
203 		return null;
204 	}
205 	
206 	/// ditto
207 	bool opBinaryRight(string op)(KeyType key) inout if(op == "!in") {
208 		return !(key in this);
209 	}
210 	
211 	/** Iterates over all fields, including duplicates.
212     */
213 	int opApply(int delegate(KeyType key, ref ValueType val) del)
214 	{
215 		foreach (ref kv; m_fields[0 .. m_fieldCount]) {
216 			if (kv == Field.init) return 0;
217 			if (auto ret = del(kv.key, kv.value))
218 				return ret;
219 		}
220 		foreach (ref kv; m_extendedFields) {
221 			if (auto ret = del(kv.key, kv.value))
222 				return ret;
223 		}
224 		return 0;
225 	}
226 	
227 	int opApply(int delegate(const ref KeyType key, const ref ValueType val) del) const
228 	{
229 		foreach (ref kv; m_fields[0 .. m_fieldCount]) {
230 			if (kv == Field.init) return 0;
231 			if (auto ret = del(kv.key, kv.value))
232 				return ret;
233 		}
234 		foreach (ref kv; m_extendedFields) {
235 			if (auto ret = del(kv.key, kv.value))
236 				return ret;
237 		}
238 		return 0;
239 	}
240 	
241 	/// ditto
242 	int opApply(int delegate(ref ValueType val) del)
243 	{
244 		return this.opApply((KeyType key, ref ValueType val) { return del(val); });
245 	}
246 	
247 	/// ditto
248 	int opApply(int delegate(KeyType key, ref const(ValueType) val) del) const
249 	{
250 		return (cast() this).opApply(cast(int delegate(KeyType, ref ValueType)) del);
251 	}
252 	
253 	/// ditto
254 	int opApply(int delegate(ref const(ValueType) val) del) const
255 	{
256 		return (cast() this).opApply(cast(int delegate(ref ValueType)) del);
257 	}
258 
259 	bool opEquals(const ref DictionaryList!(KEY, VALUE, ALLOC) other) const
260 	{
261 		foreach (const ref KeyType key, const ref ValueType val; this)
262 		{
263 			bool found;
264 			other.getValuesAt(key, (const ValueType oval) {
265 					if (oval == val) {
266 						found = true;
267 						return;
268 					}
269 				});
270 			if (!found)
271 				return false;
272 		}
273 		
274 		return true;
275 	}
276 
277 	private void grow(size_t n) {
278 		if (m_extendedFields.length + n < m_extendedFieldCount) {
279 			m_extendedFields = m_extendedFields.ptr[0 .. m_extendedFields.length + n];
280 			return;
281 		}
282 		if (m_extendedFields.length > 0)
283 		{
284 			size_t oldsz = m_extendedFields.length;
285 			m_extendedFields = m_extendedFields.ptr[0 .. m_extendedFieldCount];
286 			m_extendedFieldCount = (m_extendedFieldCount + n)*3/2;
287 			m_extendedFields = reallocArray!(Field, ALLOC)(m_extendedFields, m_extendedFieldCount)[0 .. oldsz + n];
288 			memset(m_extendedFields.ptr + oldsz, 0, (m_extendedFieldCount-oldsz)*Field.sizeof);
289 		}
290 		else {
291 			m_extendedFieldCount = 16;
292 			m_extendedFields = allocArray!(Field, ALLOC)(16).ptr[0 .. n];
293 			memset(m_extendedFields.ptr, 0, m_extendedFieldCount*Field.sizeof);
294 		}
295 	}
296 
297 	private ptrdiff_t getIndex(in Field[] map, in KeyType key, uint keysum)
298 	const {
299 		foreach (i, ref const(Field) entry; map) {
300 			if (entry.keyCheckSum != keysum) continue;
301 			if (matches(entry.key, key)) return i;
302 		}
303 		return -1;
304 	}
305 	
306 	private static bool matches(in KeyType a, in KeyType b)
307 	{
308 		static if (case_sensitive) return a == b;
309 		else static if (is (KeyType == string)) return icmp2(a, b) == 0;
310 		else return a == b;
311 	}
312 	
313 	// very simple check sum function with a good chance to match
314 	// strings with different case equal
315 	private static uint computeCheckSumI(string s)
316 	@trusted {
317 		uint csum = 0;
318 		immutable(char)* pc = s.ptr, pe = s.ptr + s.length;
319 		for (; pc != pe; pc++) {
320 			static if (case_sensitive) csum ^= *pc;
321 			else csum ^= *pc & 0x1101_1111;
322 			csum = (csum << 1) | (csum >> 31);
323 		}
324 		return csum;
325 	}
326 	
327 	private static uint computeCheckSumI(T)(ref T obj)
328 	@trusted {
329 		return cast(uint)typeid(obj).getHash(&obj);
330 	}
331 }
332 
333 private:
334 
335 void removeFromArrayIdx(T)(ref T[] array, size_t idx)
336 {
337 	import std.algorithm : swap;
338 	foreach( j; idx+1 .. array.length) { 
339 		array[j-1] = array[j];
340 	}
341 	array[array.length-1].destroy();
342 	array = array.ptr[0 .. array.length-1];
343 }
344 
345 /// Special version of icmp() with optimization for ASCII characters
346 int icmp2(in string a, in string b)
347 @safe pure {
348 	import std.algorithm : min;
349 	import std.utf : decode;
350 	import std.uni : toLower;
351 	size_t i = 0, j = 0;
352 	
353 	// fast skip equal prefix
354 	size_t min_len = min(a.length, b.length);
355 	while ( i < min_len && a[i] == b[i] ) i++;
356 	if( i > 0 && (a[i-1] & 0x80) ) i--; // don't stop half-way in a UTF-8 sequence
357 	j = i;
358 	
359 	// compare the differing character and the rest of the string
360 	while (i < a.length && j < b.length){
361 		uint ac = cast(uint)a[i];
362 		uint bc = cast(uint)b[j];
363 		if( !((ac | bc) & 0x80) ){
364 			i++;
365 			j++;
366 			if( ac >= 'A' && ac <= 'Z' ) ac += 'a' - 'A';
367 			if( bc >= 'A' && bc <= 'Z' ) bc += 'a' - 'A';
368 			if( ac < bc ) return -1;
369 			else if( ac > bc ) return 1;
370 		} else {
371 			dchar acp = decode(a, i);
372 			dchar bcp = decode(b, j);
373 			if( acp != bcp ){
374 				acp = toLower(acp);
375 				bcp = toLower(bcp);
376 				if( acp < bcp ) return -1;
377 				else if( acp > bcp ) return 1;
378 			}
379 		}
380 	}
381 	
382 	if( i < a.length ) return 1;
383 	else if( j < b.length ) return -1;
384 	
385 	assert(i == a.length || j == b.length, "Strings equal but we didn't fully compare them!?");
386 	return 0;
387 }