diff options
author | Roman Smrž <roman.smrz@seznam.cz> | 2022-10-07 13:41:11 +0200 |
---|---|---|
committer | Roman Smrž <roman.smrz@seznam.cz> | 2022-10-07 18:27:33 +0200 |
commit | 409e9dc95aa9f17770f21d11a65ec839da699f16 (patch) | |
tree | 47799d2cca2d5a9fc4d9325ab4ffc6f3aaa47cee | |
parent | bc688a7abf7f0d04429885084ed14bdf32b2d087 (diff) |
Regex expansion
-rw-r--r-- | src/Main.hs | 4 | ||||
-rw-r--r-- | src/Parser.hs | 26 | ||||
-rw-r--r-- | src/Test.hs | 48 |
3 files changed, 47 insertions, 31 deletions
diff --git a/src/Main.hs b/src/Main.hs index b19796a..28b88ae 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -14,8 +14,6 @@ import qualified Data.Text as T import qualified Data.Text.IO as T import Text.Read (readMaybe) -import Text.Regex.TDFA -import Text.Regex.TDFA.Text import System.Console.GetOpt import System.Directory @@ -238,7 +236,7 @@ spawnOn target pname killWith cmd = do return process tryMatch :: Regex -> [Text] -> Maybe ((Text, [Text]), [Text]) -tryMatch re (x:xs) | Right (Just (_, _, _, capture)) <- regexec re x = Just ((x, capture), xs) +tryMatch re (x:xs) | Right (Just (_, _, _, capture)) <- regexMatch re x = Just ((x, capture), xs) | otherwise = fmap (x:) <$> tryMatch re xs tryMatch _ [] = Nothing diff --git a/src/Parser.hs b/src/Parser.hs index 35f28c5..22928c3 100644 --- a/src/Parser.hs +++ b/src/Parser.hs @@ -132,17 +132,14 @@ someExpansion = do , between (char '{') (char '}') someExpr ] -stringExpansion :: Text -> TestParser (Expr Text) -stringExpansion tname = do +stringExpansion :: ExprType a => Text -> (forall b. ExprType b => Expr b -> [Maybe (Expr a)]) -> TestParser (Expr a) +stringExpansion tname conv = do off <- stateOffset <$> getParserState SomeExpr e <- someExpansion let err = parseError $ FancyError off $ S.singleton $ ErrorFail $ T.unpack $ T.concat [ tname, T.pack " expansion not defined for '", textExprType e, T.pack "'" ] - maybe err return $ listToMaybe $ catMaybes - [ cast e - , UnOp (T.pack . show @Integer) <$> cast e - ] + maybe err return $ listToMaybe $ catMaybes $ conv e integerLiteral :: TestParser (Expr Integer) integerLiteral = Literal . read . TL.unpack <$> takeWhile1P (Just "integer") isDigit @@ -163,7 +160,10 @@ quotedString = label "string" $ lexeme $ do , char 't' >> return '\t' ] (Literal (T.singleton c) :) <$> inner - ,do e <- stringExpansion (T.pack "string") + ,do e <- stringExpansion (T.pack "string") $ \e -> + [ cast e + , UnOp (T.pack . show @Integer) <$> cast e + ] (e:) <$> inner ] Concat <$> inner @@ -173,14 +173,18 @@ regex = label "regular expression" $ lexeme $ do void $ char '/' let inner = choice [ char '/' >> return [] - , takeWhile1P Nothing (`notElem` "/\\$") >>= \s -> (Literal (TL.toStrict s) :) <$> inner + , takeWhile1P Nothing (`notElem` "/\\$") >>= \s -> (Literal (RegexPart (TL.toStrict s)) :) <$> inner ,do void $ char '\\' s <- choice - [ char '/' >> return (Literal $ T.singleton '/') - , anySingle >>= \c -> return (Literal $ T.pack ['\\', c]) + [ char '/' >> return (Literal $ RegexPart $ T.singleton '/') + , anySingle >>= \c -> return (Literal $ RegexPart $ T.pack ['\\', c]) ] (s:) <$> inner - ,do e <- stringExpansion (T.pack "regex") + ,do e <- stringExpansion (T.pack "regex") $ \e -> + [ cast e + , UnOp RegexString <$> cast e + , UnOp (RegexString . T.pack . show @Integer) <$> cast e + ] (e:) <$> inner ] expr <- Regex <$> inner diff --git a/src/Test.hs b/src/Test.hs index 9175589..cfc144b 100644 --- a/src/Test.hs +++ b/src/Test.hs @@ -9,10 +9,9 @@ module Test ( SomeVarValue(..), fromSomeVarValue, textSomeVarValue, RecordSelector(..), Expr(..), eval, gatherVars, - Regex, -) where -import Control.Monad + Regex(RegexPart, RegexString), regexMatch, +) where import Data.Char import Data.List @@ -20,8 +19,8 @@ import Data.Text (Text) import qualified Data.Text as T import Data.Typeable -import Text.Regex.TDFA -import Text.Regex.TDFA.Text +import Text.Regex.TDFA qualified as RE +import Text.Regex.TDFA.Text qualified as RE import {-# SOURCE #-} Network import {-# SOURCE #-} Process @@ -86,7 +85,7 @@ instance ExprType Text where instance ExprType Regex where textExprType _ = T.pack "regex" textExprValue _ = T.pack "<regex>" - emptyVarValue = either error id $ compile defaultCompOpt defaultExecOpt T.empty + emptyVarValue = either error id $ regexCompile T.empty data SomeVarValue = forall a. ExprType a => SomeVarValue a @@ -104,7 +103,7 @@ data Expr a where Variable :: ExprType a => VarName -> Expr a Literal :: ExprType a => a -> Expr a Concat :: [Expr Text] -> Expr Text - Regex :: [Expr Text] -> Expr Regex + Regex :: [Expr Regex] -> Expr Regex UnOp :: (b -> a) -> Expr b -> Expr a BinOp :: (b -> c -> a) -> Expr b -> Expr c -> Expr a @@ -112,16 +111,9 @@ eval :: MonadEval m => Expr a -> m a eval (Variable name) = fromSomeVarValue name =<< lookupVar name eval (Literal value) = return value eval (Concat xs) = T.concat <$> mapM eval xs -eval (Regex xs) = do - parts <- forM xs $ \case - Literal value | Just str <- cast value -> return str - | otherwise -> fail $ "regex expansion not defined for " ++ T.unpack (textExprType $ Just value) - expr -> T.concatMap escapeChar <$> eval expr - where - escapeChar c | isAlphaNum c = T.singleton c - | c `elem` "`'<>" = T.singleton c - | otherwise = T.pack ['\\', c] - case compile defaultCompOpt defaultExecOpt $ T.concat $ concat [[T.singleton '^'], parts, [T.singleton '$']] of +eval (Regex xs) = mapM eval xs >>= \case + [re@RegexCompiled {}] -> return re + parts -> case regexCompile $ T.concat $ map regexSource parts of Left err -> fail err Right re -> return re eval (UnOp f x) = f <$> eval x @@ -137,3 +129,25 @@ gatherVars = fmap (uniqOn fst . sortOn fst) . helper helper (Regex es) = concat <$> mapM helper es helper (UnOp _ e) = helper e helper (BinOp _ e f) = (++) <$> helper e <*> helper f + + +data Regex = RegexCompiled Text RE.Regex + | RegexPart Text + | RegexString Text + +regexCompile :: Text -> Either String Regex +regexCompile src = either Left (Right . RegexCompiled src) $ RE.compile RE.defaultCompOpt RE.defaultExecOpt $ + T.singleton '^' <> src <> T.singleton '$' + +regexMatch :: Regex -> Text -> Either String (Maybe (Text, Text, Text, [Text])) +regexMatch (RegexCompiled _ re) text = RE.regexec re text +regexMatch _ _ = Left "regex not compiled" + +regexSource :: Regex -> Text +regexSource (RegexCompiled src _) = src +regexSource (RegexPart src) = src +regexSource (RegexString str) = T.concatMap escapeChar str + where + escapeChar c | isAlphaNum c = T.singleton c + | c `elem` "`'<>" = T.singleton c + | otherwise = T.pack ['\\', c] |