mwaterfall/MWFeedParser

Problem (and solution) handling 32bit unicode entities

kshepherd2013 opened this issue ยท 0 comments

If you try this, it does not substitute the entities:
NSString * example = @"Title ๐Ÿš“๐ŸšŒ๐Ÿš’๐Ÿš–";
NSString * result = [example stringByDecodingHTMLEntities];

Here is the fix (GTMNSString+HTML.m from line 481):
NSScanner *scanner = [NSScanner scannerWithString:hexSequence];
unsigned long long value;
if ([scanner scanHexLongLong:&value] && [scanner scanLocation] == length - 4) {
if (value < USHRT_MAX) {
unichar uchar = value;
NSString *charString = [NSString stringWithCharacters:&uchar length:1];
[finalString replaceCharactersInRange:escapeRange withString:charString];
} else if ( (value & 0xFF000000) == 0 ){
value -= 0x10000;
unichar highSurrogate = value >> 10; // leave the top 10 bits
highSurrogate += 0xD800;
unichar lowSurrogate = value & 0x3FF; // leave the low 10 bits
lowSurrogate += 0xDC00;
NSString *charString = [NSString stringWithCharacters:(unichar[]){highSurrogate, lowSurrogate} length:2];
[finalString replaceCharactersInRange:escapeRange withString:charString];
}