summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoman Smrž <roman.smrz@seznam.cz>2022-10-07 13:41:11 +0200
committerRoman Smrž <roman.smrz@seznam.cz>2022-10-07 18:27:33 +0200
commit409e9dc95aa9f17770f21d11a65ec839da699f16 (patch)
tree47799d2cca2d5a9fc4d9325ab4ffc6f3aaa47cee
parentbc688a7abf7f0d04429885084ed14bdf32b2d087 (diff)
Regex expansion
-rw-r--r--src/Main.hs4
-rw-r--r--src/Parser.hs26
-rw-r--r--src/Test.hs48
3 files changed, 47 insertions, 31 deletions
diff --git a/src/Main.hs b/src/Main.hs
index b19796a..28b88ae 100644
--- a/src/Main.hs
+++ b/src/Main.hs
@@ -14,8 +14,6 @@ import qualified Data.Text as T
import qualified Data.Text.IO as T
import Text.Read (readMaybe)
-import Text.Regex.TDFA
-import Text.Regex.TDFA.Text
import System.Console.GetOpt
import System.Directory
@@ -238,7 +236,7 @@ spawnOn target pname killWith cmd = do
return process
tryMatch :: Regex -> [Text] -> Maybe ((Text, [Text]), [Text])
-tryMatch re (x:xs) | Right (Just (_, _, _, capture)) <- regexec re x = Just ((x, capture), xs)
+tryMatch re (x:xs) | Right (Just (_, _, _, capture)) <- regexMatch re x = Just ((x, capture), xs)
| otherwise = fmap (x:) <$> tryMatch re xs
tryMatch _ [] = Nothing
diff --git a/src/Parser.hs b/src/Parser.hs
index 35f28c5..22928c3 100644
--- a/src/Parser.hs
+++ b/src/Parser.hs
@@ -132,17 +132,14 @@ someExpansion = do
, between (char '{') (char '}') someExpr
]
-stringExpansion :: Text -> TestParser (Expr Text)
-stringExpansion tname = do
+stringExpansion :: ExprType a => Text -> (forall b. ExprType b => Expr b -> [Maybe (Expr a)]) -> TestParser (Expr a)
+stringExpansion tname conv = do
off <- stateOffset <$> getParserState
SomeExpr e <- someExpansion
let err = parseError $ FancyError off $ S.singleton $ ErrorFail $ T.unpack $ T.concat
[ tname, T.pack " expansion not defined for '", textExprType e, T.pack "'" ]
- maybe err return $ listToMaybe $ catMaybes
- [ cast e
- , UnOp (T.pack . show @Integer) <$> cast e
- ]
+ maybe err return $ listToMaybe $ catMaybes $ conv e
integerLiteral :: TestParser (Expr Integer)
integerLiteral = Literal . read . TL.unpack <$> takeWhile1P (Just "integer") isDigit
@@ -163,7 +160,10 @@ quotedString = label "string" $ lexeme $ do
, char 't' >> return '\t'
]
(Literal (T.singleton c) :) <$> inner
- ,do e <- stringExpansion (T.pack "string")
+ ,do e <- stringExpansion (T.pack "string") $ \e ->
+ [ cast e
+ , UnOp (T.pack . show @Integer) <$> cast e
+ ]
(e:) <$> inner
]
Concat <$> inner
@@ -173,14 +173,18 @@ regex = label "regular expression" $ lexeme $ do
void $ char '/'
let inner = choice
[ char '/' >> return []
- , takeWhile1P Nothing (`notElem` "/\\$") >>= \s -> (Literal (TL.toStrict s) :) <$> inner
+ , takeWhile1P Nothing (`notElem` "/\\$") >>= \s -> (Literal (RegexPart (TL.toStrict s)) :) <$> inner
,do void $ char '\\'
s <- choice
- [ char '/' >> return (Literal $ T.singleton '/')
- , anySingle >>= \c -> return (Literal $ T.pack ['\\', c])
+ [ char '/' >> return (Literal $ RegexPart $ T.singleton '/')
+ , anySingle >>= \c -> return (Literal $ RegexPart $ T.pack ['\\', c])
]
(s:) <$> inner
- ,do e <- stringExpansion (T.pack "regex")
+ ,do e <- stringExpansion (T.pack "regex") $ \e ->
+ [ cast e
+ , UnOp RegexString <$> cast e
+ , UnOp (RegexString . T.pack . show @Integer) <$> cast e
+ ]
(e:) <$> inner
]
expr <- Regex <$> inner
diff --git a/src/Test.hs b/src/Test.hs
index 9175589..cfc144b 100644
--- a/src/Test.hs
+++ b/src/Test.hs
@@ -9,10 +9,9 @@ module Test (
SomeVarValue(..), fromSomeVarValue, textSomeVarValue,
RecordSelector(..),
Expr(..), eval, gatherVars,
- Regex,
-) where
-import Control.Monad
+ Regex(RegexPart, RegexString), regexMatch,
+) where
import Data.Char
import Data.List
@@ -20,8 +19,8 @@ import Data.Text (Text)
import qualified Data.Text as T
import Data.Typeable
-import Text.Regex.TDFA
-import Text.Regex.TDFA.Text
+import Text.Regex.TDFA qualified as RE
+import Text.Regex.TDFA.Text qualified as RE
import {-# SOURCE #-} Network
import {-# SOURCE #-} Process
@@ -86,7 +85,7 @@ instance ExprType Text where
instance ExprType Regex where
textExprType _ = T.pack "regex"
textExprValue _ = T.pack "<regex>"
- emptyVarValue = either error id $ compile defaultCompOpt defaultExecOpt T.empty
+ emptyVarValue = either error id $ regexCompile T.empty
data SomeVarValue = forall a. ExprType a => SomeVarValue a
@@ -104,7 +103,7 @@ data Expr a where
Variable :: ExprType a => VarName -> Expr a
Literal :: ExprType a => a -> Expr a
Concat :: [Expr Text] -> Expr Text
- Regex :: [Expr Text] -> Expr Regex
+ Regex :: [Expr Regex] -> Expr Regex
UnOp :: (b -> a) -> Expr b -> Expr a
BinOp :: (b -> c -> a) -> Expr b -> Expr c -> Expr a
@@ -112,16 +111,9 @@ eval :: MonadEval m => Expr a -> m a
eval (Variable name) = fromSomeVarValue name =<< lookupVar name
eval (Literal value) = return value
eval (Concat xs) = T.concat <$> mapM eval xs
-eval (Regex xs) = do
- parts <- forM xs $ \case
- Literal value | Just str <- cast value -> return str
- | otherwise -> fail $ "regex expansion not defined for " ++ T.unpack (textExprType $ Just value)
- expr -> T.concatMap escapeChar <$> eval expr
- where
- escapeChar c | isAlphaNum c = T.singleton c
- | c `elem` "`'<>" = T.singleton c
- | otherwise = T.pack ['\\', c]
- case compile defaultCompOpt defaultExecOpt $ T.concat $ concat [[T.singleton '^'], parts, [T.singleton '$']] of
+eval (Regex xs) = mapM eval xs >>= \case
+ [re@RegexCompiled {}] -> return re
+ parts -> case regexCompile $ T.concat $ map regexSource parts of
Left err -> fail err
Right re -> return re
eval (UnOp f x) = f <$> eval x
@@ -137,3 +129,25 @@ gatherVars = fmap (uniqOn fst . sortOn fst) . helper
helper (Regex es) = concat <$> mapM helper es
helper (UnOp _ e) = helper e
helper (BinOp _ e f) = (++) <$> helper e <*> helper f
+
+
+data Regex = RegexCompiled Text RE.Regex
+ | RegexPart Text
+ | RegexString Text
+
+regexCompile :: Text -> Either String Regex
+regexCompile src = either Left (Right . RegexCompiled src) $ RE.compile RE.defaultCompOpt RE.defaultExecOpt $
+ T.singleton '^' <> src <> T.singleton '$'
+
+regexMatch :: Regex -> Text -> Either String (Maybe (Text, Text, Text, [Text]))
+regexMatch (RegexCompiled _ re) text = RE.regexec re text
+regexMatch _ _ = Left "regex not compiled"
+
+regexSource :: Regex -> Text
+regexSource (RegexCompiled src _) = src
+regexSource (RegexPart src) = src
+regexSource (RegexString str) = T.concatMap escapeChar str
+ where
+ escapeChar c | isAlphaNum c = T.singleton c
+ | c `elem` "`'<>" = T.singleton c
+ | otherwise = T.pack ['\\', c]