summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoman Smrž <roman.smrz@seznam.cz>2025-10-09 22:46:39 +0200
committerRoman Smrž <roman.smrz@seznam.cz>2025-10-11 00:13:25 +0200
commit804804d41814f1f2309e2f31b672074a79dfb2dd (patch)
tree8f7614a1612af990dd3fab125244725f3aa43ea3
parent818688ab0242a5d4329f9a690e6c119e4b3d34da (diff)
Rewrite record parsing to be more strict
-rw-r--r--src/Erebos/Object/Internal.hs80
-rw-r--r--test/storage.et15
2 files changed, 58 insertions, 37 deletions
diff --git a/src/Erebos/Object/Internal.hs b/src/Erebos/Object/Internal.hs
index 4604f13..dc48973 100644
--- a/src/Erebos/Object/Internal.hs
+++ b/src/Erebos/Object/Internal.hs
@@ -240,45 +240,55 @@ unsafeDeserializeObject st bytes =
| otype == BC.pack "blob"
-> return $ Blob content
| otype == BC.pack "rec"
- , Just ritems <- sequence $ map parseRecLine $ mergeCont [] $ BC.lines content
+ , Just ritems <- parseRecordBody st content
-> return $ Rec ritems
| otherwise
-> return $ UnknownObject otype content
_ -> throwOtherError $ "malformed object"
- where splitObjPrefix line = do
- [otype, tlen] <- return $ BLC.words line
- (len, rest) <- BLC.readInt tlen
- guard $ BL.null rest
- return (BL.toStrict otype, len)
-
- mergeCont cs (a:b:rest) | Just ('\t', b') <- BC.uncons b = mergeCont (b':BC.pack "\n":cs) (a:rest)
- mergeCont cs (a:rest) = B.concat (a : reverse cs) : mergeCont [] rest
- mergeCont _ [] = []
-
- parseRecLine line = do
- colon <- BC.elemIndex ':' line
- space <- BC.elemIndex ' ' line
- guard $ colon < space
- let name = B.take colon line
- itype = B.take (space-colon-1) $ B.drop (colon+1) line
- content = B.drop (space+1) line
-
- let val = fromMaybe (RecUnknown itype content) $
- case BC.unpack itype of
- "e" -> do guard $ B.null content
- return RecEmpty
- "i" -> do (num, rest) <- BC.readInteger content
- guard $ B.null rest
- return $ RecInt num
- "n" -> RecNum <$> parseRatio content
- "t" -> return $ RecText $ decodeUtf8With lenientDecode content
- "b" -> RecBinary <$> readHex content
- "d" -> RecDate <$> parseTimeM False defaultTimeLocale "%s %z" (BC.unpack content)
- "u" -> RecUUID <$> U.fromASCIIBytes content
- "r" -> RecRef . Ref st <$> readRefDigest content
- "w" -> RecWeak <$> readRefDigest content
- _ -> Nothing
- return (name, val)
+ where
+ splitObjPrefix line = do
+ [ otype, tlen ] <- return $ BLC.words line
+ ( len, rest ) <- BLC.readInt tlen
+ guard $ BL.null rest
+ return ( BL.toStrict otype, len )
+
+parseRecordBody :: Storage' c -> ByteString -> Maybe [ ( ByteString, RecItem' c ) ]
+parseRecordBody _ body | B.null body = Just []
+parseRecordBody st body = do
+ colon <- BC.elemIndex ':' body
+ space <- BC.elemIndex ' ' $ B.drop (colon + 1) body
+ let name = B.take colon body
+ itype = B.take space $ B.drop (colon + 1) body
+ ( content, remainingBody ) <- parseTabEscapedLines $ B.drop (space + colon + 2) body
+
+ let val = fromMaybe (RecUnknown itype content) $
+ case BC.unpack itype of
+ "e" -> do guard $ B.null content
+ return RecEmpty
+ "i" -> do ( num, rest ) <- BC.readInteger content
+ guard $ B.null rest
+ return $ RecInt num
+ "n" -> RecNum <$> parseRatio content
+ "t" -> return $ RecText $ decodeUtf8With lenientDecode content
+ "b" -> RecBinary <$> readHex content
+ "d" -> RecDate <$> parseTimeM False defaultTimeLocale "%s %z" (BC.unpack content)
+ "u" -> RecUUID <$> U.fromASCIIBytes content
+ "r" -> RecRef . Ref st <$> readRefDigest content
+ "w" -> RecWeak <$> readRefDigest content
+ _ -> Nothing
+ (( name, val ) :) <$> parseRecordBody st remainingBody
+
+-- Split given ByteString on the first newline not preceded by tab; replace
+-- "\t\n" in the first part with "\n".
+parseTabEscapedLines :: ByteString -> Maybe ( ByteString, ByteString )
+parseTabEscapedLines = parseLines []
+ where
+ parseLines linesReversed cur = do
+ newline <- BC.elemIndex '\n' cur
+ case BC.indexMaybe cur (newline + 1) of
+ Just '\t' -> parseLines (B.take (newline + 1) cur : linesReversed) (B.drop (newline + 2) cur)
+ _ -> Just ( BC.concat $ reverse $ B.take newline cur : linesReversed, B.drop (newline + 1) cur )
+
deserializeObject :: PartialStorage -> BL.ByteString -> Except ErebosError (PartialObject, BL.ByteString)
deserializeObject = unsafeDeserializeObject
diff --git a/test/storage.et b/test/storage.et
index 035a64b..1510c38 100644
--- a/test/storage.et
+++ b/test/storage.et
@@ -558,7 +558,18 @@ test ObjectFormat:
# Small record
local:
send "store-raw EOF"
- send "rec 8\nnum:n 1\n"
+ send "rec 28\nnum:n 1\ntext:t abc\nempty:e \n"
+ send "EOF"
+ expect /store-done ($refpat)/ capture r
+
+ send "load-type $r"
+ expect /load-type (.*)/ capture type
+ guard (type == "rec")
+
+ # Record with multiline items
+ local:
+ send "store-raw EOF"
+ send "rec 34\nfirst:t abc\n\tdef\nsecond:t \tx\n\ty\tz\n"
send "EOF"
expect /store-done ($refpat)/ capture r
@@ -578,7 +589,7 @@ test ObjectFormat:
guard (type == "rec")
# Invalid records
- for content in [ "rec 6\nnum 1\n", "rec 6\nnum:n\n" ]:
+ for content in [ "rec 6\nnum 1\n", "rec 6\nnum:n\n", "rec 7\nnum:n 1" ]:
send "store-raw EOF"
send "$content"
send "EOF"